junshan-kit 2.7.3__py2.py3-none-any.whl → 2.8.5__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- junshan_kit/DataProcessor.py +48 -1
- junshan_kit/FiguresHub.py +18 -14
- junshan_kit/OptimizerHup/SPBM.py +6 -4
- junshan_kit/ParametersHub.py +18 -2
- junshan_kit/Print_Info.py +2 -2
- junshan_kit/TrainingHub.py +2 -2
- {junshan_kit-2.7.3.dist-info → junshan_kit-2.8.5.dist-info}/METADATA +1 -1
- {junshan_kit-2.7.3.dist-info → junshan_kit-2.8.5.dist-info}/RECORD +9 -9
- {junshan_kit-2.7.3.dist-info → junshan_kit-2.8.5.dist-info}/WHEEL +0 -0
junshan_kit/DataProcessor.py
CHANGED
|
@@ -8,9 +8,11 @@
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
import numpy as np
|
|
10
10
|
import torch, bz2
|
|
11
|
+
from typing import Optional
|
|
11
12
|
from torch.utils.data import random_split, Subset
|
|
12
13
|
from sklearn.datasets import load_svmlight_file
|
|
13
14
|
from sklearn.preprocessing import StandardScaler
|
|
15
|
+
from junshan_kit import ParametersHub
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
class CSV_TO_Pandas:
|
|
@@ -386,7 +388,7 @@ def get_libsvm_bz2_data(train_path, test_path, data_name, Paras, split = True):
|
|
|
386
388
|
transform = "-1 → 0 for binary, y-1 for multi-class"
|
|
387
389
|
train_data = LibSVMDataset_bz2(train_path)
|
|
388
390
|
|
|
389
|
-
if data_name in ["Duke", "Ijcnn"]:
|
|
391
|
+
if data_name in ["Duke", "Ijcnn", "RCV1"]:
|
|
390
392
|
test_data = LibSVMDataset_bz2(test_path)
|
|
391
393
|
split = False
|
|
392
394
|
else:
|
|
@@ -410,3 +412,48 @@ def get_libsvm_bz2_data(train_path, test_path, data_name, Paras, split = True):
|
|
|
410
412
|
# assert False
|
|
411
413
|
|
|
412
414
|
return train_dataset, test_dataset, transform
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def subset(dataset, ratio_or_num, seed=None) -> Subset:
|
|
418
|
+
"""
|
|
419
|
+
Randomly sample a subset from a dataset.
|
|
420
|
+
|
|
421
|
+
Parameters
|
|
422
|
+
----------
|
|
423
|
+
dataset : torch.utils.data.Dataset
|
|
424
|
+
The dataset to sample from.
|
|
425
|
+
ratio_or_num : float or int
|
|
426
|
+
If float in (0, 1], treated as sampling ratio.
|
|
427
|
+
Otherwise, treated as absolute number of samples.
|
|
428
|
+
seed : int, optional
|
|
429
|
+
Random seed for reproducibility.
|
|
430
|
+
|
|
431
|
+
Returns
|
|
432
|
+
-------
|
|
433
|
+
torch.utils.data.Subset
|
|
434
|
+
A randomly sampled subset of the dataset.
|
|
435
|
+
"""
|
|
436
|
+
|
|
437
|
+
if ratio_or_num < 0:
|
|
438
|
+
raise ValueError(f"ratio_or_num must be non-negative, got {ratio_or_num}")
|
|
439
|
+
|
|
440
|
+
dataset_len = len(dataset)
|
|
441
|
+
|
|
442
|
+
# Determine number of samples
|
|
443
|
+
if isinstance(ratio_or_num, float) and 0 < ratio_or_num <= 1:
|
|
444
|
+
num = max(1, int(round(dataset_len * ratio_or_num)))
|
|
445
|
+
else:
|
|
446
|
+
num = int(ratio_or_num)
|
|
447
|
+
|
|
448
|
+
# Clamp to valid range
|
|
449
|
+
num = min(max(num, 1), dataset_len)
|
|
450
|
+
|
|
451
|
+
# Create and seed generator
|
|
452
|
+
generator = torch.Generator()
|
|
453
|
+
if seed is not None:
|
|
454
|
+
generator.manual_seed(seed)
|
|
455
|
+
|
|
456
|
+
# Random sampling
|
|
457
|
+
indices = torch.randperm(dataset_len, generator=generator)[:num].tolist()
|
|
458
|
+
|
|
459
|
+
return Subset(dataset, indices)
|
junshan_kit/FiguresHub.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
----------------------------------------------------------------------
|
|
3
3
|
>>> Author : Junshan Yin
|
|
4
|
-
>>> Last Updated : 2025-
|
|
4
|
+
>>> Last Updated : 2025-12-19
|
|
5
5
|
----------------------------------------------------------------------
|
|
6
6
|
"""
|
|
7
7
|
import math, os
|
|
@@ -22,6 +22,8 @@ def marker_schedule(marker_schedule=None):
|
|
|
22
22
|
"SPSmax": "4", # tri-right
|
|
23
23
|
"SPBM-PF": "*", # star
|
|
24
24
|
"SPBM-TR": "s", # star
|
|
25
|
+
"SPBM-PF-NoneCut": "s", # circle
|
|
26
|
+
"SPBM-TR-NoneCut": "s", # circle
|
|
25
27
|
}
|
|
26
28
|
|
|
27
29
|
else:
|
|
@@ -64,6 +66,8 @@ def colors_schedule(colors_schedule=None):
|
|
|
64
66
|
"SPSmax": "#BA6262",
|
|
65
67
|
"SPBM-PF": "#1f77b4",
|
|
66
68
|
"SPBM-TR": "#d62728",
|
|
69
|
+
"SPBM-PF-NoneCut": "#8c564b",
|
|
70
|
+
"SPBM-TR-NoneCut": "#e377c2",
|
|
67
71
|
}
|
|
68
72
|
|
|
69
73
|
else:
|
|
@@ -112,7 +116,7 @@ def Search_Paras(Paras, args, model_name, data_name, optimizer_name, metric_key
|
|
|
112
116
|
fig.delaxes(axes[i])
|
|
113
117
|
|
|
114
118
|
|
|
115
|
-
plt.suptitle(f'{model_name} on {data_name} - {optimizer_name} (training
|
|
119
|
+
plt.suptitle(f'{model_name} on {data_name} - {optimizer_name}, (training, test) = ({Paras['train_data_num']}/{Paras['train_data_all_num']}, {Paras['test_data_num']}/{Paras['test_data_all_num']}), {Paras["device"]}, batch_size: {Paras["batch_size"]}', fontsize=16)
|
|
116
120
|
plt.tight_layout(rect=(0, 0, 1, 0.9))
|
|
117
121
|
|
|
118
122
|
filename = f'{Paras["Results_folder"]}/{metric_key}_{ParametersHub.model_abbr(model_name)}_{data_name}_{optimizer_name}.pdf'
|
|
@@ -220,19 +224,19 @@ def Mul_Plot(model_name, info_dict, Exp_name = "SPBM", cols = 3, save_path = Non
|
|
|
220
224
|
)
|
|
221
225
|
|
|
222
226
|
plt.tight_layout()
|
|
223
|
-
if
|
|
224
|
-
|
|
227
|
+
if save_path is None:
|
|
228
|
+
save_path_ = f'{model_name}.pdf'
|
|
225
229
|
else:
|
|
226
|
-
os.makedirs(
|
|
227
|
-
|
|
228
|
-
plt.savefig(
|
|
230
|
+
os.makedirs(save_path, exist_ok=True)
|
|
231
|
+
save_path_ = f'{save_path}/{save_name}.pdf'
|
|
232
|
+
plt.savefig(save_path_, bbox_inches="tight")
|
|
229
233
|
if fig_show:
|
|
230
234
|
plt.show()
|
|
231
235
|
plt.close() # Colse the fig
|
|
232
236
|
|
|
233
237
|
|
|
234
238
|
|
|
235
|
-
def Opt_Paras_Plot(model_name, info_dict, Exp_name = "SPBM",
|
|
239
|
+
def Opt_Paras_Plot(model_name, info_dict, Exp_name = "SPBM", save_path = None, save_name = None, fig_show = False):
|
|
236
240
|
|
|
237
241
|
mpl.rcParams['font.family'] = 'Times New Roman'
|
|
238
242
|
mpl.rcParams["mathtext.fontset"] = "stix"
|
|
@@ -271,16 +275,16 @@ def Opt_Paras_Plot(model_name, info_dict, Exp_name = "SPBM", svae_path = None, s
|
|
|
271
275
|
plt.tight_layout() # Adjust layout so the legend fits
|
|
272
276
|
plt.xlabel("epochs") # Or whatever your x-axis represents
|
|
273
277
|
plt.ylabel(f'{ParametersHub.fig_ylabel(info_dict[data_name]["metric_key"])}')
|
|
274
|
-
if
|
|
275
|
-
|
|
278
|
+
if save_path is None:
|
|
279
|
+
save_path_ = f'{model_name}.pdf'
|
|
276
280
|
else:
|
|
277
|
-
os.makedirs(
|
|
278
|
-
|
|
279
|
-
plt.savefig(
|
|
280
|
-
|
|
281
|
+
os.makedirs(save_path, exist_ok=True)
|
|
282
|
+
save_path_ = f'{save_path}/{save_name}.pdf'
|
|
283
|
+
plt.savefig(save_path_, bbox_inches="tight")
|
|
281
284
|
if fig_show:
|
|
282
285
|
plt.show()
|
|
283
286
|
|
|
284
287
|
plt.close()
|
|
285
288
|
|
|
286
289
|
|
|
290
|
+
|
junshan_kit/OptimizerHup/SPBM.py
CHANGED
|
@@ -226,7 +226,6 @@ class TR_NoneCut(Optimizer):
|
|
|
226
226
|
super().__init__(params, defaults)
|
|
227
227
|
self.model = model
|
|
228
228
|
self.cutting_num = hyperparams['cutting_number']
|
|
229
|
-
self.M = hyperparams['M']
|
|
230
229
|
self.delta = hyperparams['delta']
|
|
231
230
|
self.Paras = Paras
|
|
232
231
|
|
|
@@ -254,7 +253,9 @@ class TR_NoneCut(Optimizer):
|
|
|
254
253
|
Gk, rk, ek = SPBM_func.get_var(x_his, f_his, g_his, self.delta)
|
|
255
254
|
|
|
256
255
|
# SOVER (dual)
|
|
257
|
-
xk = SPBM_func.subproblem_tr_NoneLower(Gk, ek, xk, rk, self.Paras)
|
|
256
|
+
# xk = SPBM_func.subproblem_tr_NoneLower(Gk, ek, xk, rk, self.Paras)
|
|
257
|
+
|
|
258
|
+
xk = SPBM_func.subproblem_tr_2(Gk, ek, xk, rk, self.Paras)
|
|
258
259
|
|
|
259
260
|
# print(len(self.f_his))
|
|
260
261
|
vector_to_parameters(xk, self.model.parameters())
|
|
@@ -312,7 +313,6 @@ class PF_NoneCut(Optimizer):
|
|
|
312
313
|
super().__init__(params, defaults)
|
|
313
314
|
self.model = model
|
|
314
315
|
self.cutting_num = hyperparams['cutting_number']
|
|
315
|
-
self.M = hyperparams['M']
|
|
316
316
|
self.delta = hyperparams['delta']
|
|
317
317
|
self.Paras = Paras
|
|
318
318
|
|
|
@@ -340,7 +340,9 @@ class PF_NoneCut(Optimizer):
|
|
|
340
340
|
Gk, rk, ek = SPBM_func.get_var(x_his, f_his, g_his, self.delta)
|
|
341
341
|
|
|
342
342
|
# SOVER (dual)
|
|
343
|
-
xk = SPBM_func.subproblem_pf_NoneLower(Gk, ek, xk, self.delta, self.Paras)
|
|
343
|
+
# xk = SPBM_func.subproblem_pf_NoneLower(Gk, ek, xk, self.delta, self.Paras)
|
|
344
|
+
|
|
345
|
+
xk = SPBM_func.subproblem_pf(Gk, ek, xk, self.delta, self.Paras)
|
|
344
346
|
|
|
345
347
|
# print(len(self.f_his))
|
|
346
348
|
vector_to_parameters(xk, self.model.parameters())
|
junshan_kit/ParametersHub.py
CHANGED
|
@@ -59,6 +59,8 @@ class args:
|
|
|
59
59
|
"AIP": "Adult_Income_Prediction",
|
|
60
60
|
"CCFD": "Credit_Card_Fraud_Detection",
|
|
61
61
|
"Ijcnn": "Ijcnn",
|
|
62
|
+
"RCV1": "RCV1",
|
|
63
|
+
"w8a": "w8a",
|
|
62
64
|
"DHI":"Diabetes_Health_Indicators",
|
|
63
65
|
"EVP": "Electric_Vehicle_Population",
|
|
64
66
|
"GHP": "Global_House_Purchase",
|
|
@@ -166,6 +168,13 @@ class args:
|
|
|
166
168
|
# required=True,
|
|
167
169
|
help = "search_grid: 1: "
|
|
168
170
|
)
|
|
171
|
+
|
|
172
|
+
parser.add_argument(
|
|
173
|
+
"--OptParas",
|
|
174
|
+
type=int,
|
|
175
|
+
nargs=1,
|
|
176
|
+
help="Number of optimization steps for parameter tuning (default: 1)"
|
|
177
|
+
)
|
|
169
178
|
# <args_from_command>
|
|
170
179
|
|
|
171
180
|
args = parser.parse_args()
|
|
@@ -177,16 +186,19 @@ class args:
|
|
|
177
186
|
# <args>
|
|
178
187
|
|
|
179
188
|
def UpdateOtherParas(args, OtherParas):
|
|
189
|
+
# <time_str>
|
|
180
190
|
if args.time_str is not None:
|
|
181
191
|
time_str = args.time_str[0]
|
|
182
192
|
else:
|
|
183
193
|
time_str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
|
184
194
|
|
|
195
|
+
# <user_search_grid>
|
|
185
196
|
if args.user_search_grid is not None:
|
|
186
197
|
OtherParas["user_search_grid"] = args.user_search_grid[0]
|
|
187
198
|
else:
|
|
188
199
|
OtherParas["user_search_grid"] = None
|
|
189
200
|
|
|
201
|
+
# <send_email>
|
|
190
202
|
if args.send_email is not None:
|
|
191
203
|
OtherParas["from_email"] = args.send_email[0]
|
|
192
204
|
OtherParas["to_email"] = args.send_email[1]
|
|
@@ -195,6 +207,11 @@ def UpdateOtherParas(args, OtherParas):
|
|
|
195
207
|
else:
|
|
196
208
|
OtherParas["send_email"] = False
|
|
197
209
|
|
|
210
|
+
if args.OptParas is not None:
|
|
211
|
+
OtherParas["SeleParasOn"] = False
|
|
212
|
+
else:
|
|
213
|
+
OtherParas["SeleParasOn"] = True
|
|
214
|
+
|
|
198
215
|
OtherParas["time_str"] = time_str
|
|
199
216
|
OtherParas["results_folder_name"] = f'Results_{OtherParas["exp_name"]}'
|
|
200
217
|
|
|
@@ -428,7 +445,6 @@ def optimizer_paras_dict(Paras, OtherParas)->dict:
|
|
|
428
445
|
# ----------- SPBM-TR-NoneCut -----------
|
|
429
446
|
"SPBM-TR-NoneCut": {
|
|
430
447
|
"params": {
|
|
431
|
-
"M": [1e-5],
|
|
432
448
|
"delta": (
|
|
433
449
|
[2**i for i in range(-8, 9)]
|
|
434
450
|
if OtherParas["SeleParasOn"]
|
|
@@ -453,7 +469,6 @@ def optimizer_paras_dict(Paras, OtherParas)->dict:
|
|
|
453
469
|
# ----------- SPBM-PF-NoneCut -----------
|
|
454
470
|
"SPBM-PF-NoneCut": {
|
|
455
471
|
"params": {
|
|
456
|
-
"M": [1e-5],
|
|
457
472
|
"delta": (
|
|
458
473
|
[2**i for i in range(-8, 9)]
|
|
459
474
|
if OtherParas["SeleParasOn"]
|
|
@@ -592,6 +607,7 @@ def set_marker_point(epoch_num: int) -> list:
|
|
|
592
607
|
6: [0, 2, 4, 6],
|
|
593
608
|
8: [0, 2, 4, 6, 8],
|
|
594
609
|
10: [0, 2, 4, 6, 8, 10],
|
|
610
|
+
50: [0, 10, 20, 30, 40, 50],
|
|
595
611
|
100: [0, 20, 40, 60, 80, 100],
|
|
596
612
|
200: [0, 40, 80, 120, 160, 200],
|
|
597
613
|
}
|
junshan_kit/Print_Info.py
CHANGED
|
@@ -13,7 +13,7 @@ def training_info(args, data_name, optimizer_name, hyperparams, Paras, model_nam
|
|
|
13
13
|
if Paras["use_color"]:
|
|
14
14
|
print("\033[90m" + "-" * 115 + "\033[0m")
|
|
15
15
|
print(
|
|
16
|
-
f"\033[32m✅ \033[34mDataset:\033[32m {data_name}, \t\033[34mBatch-size:\033[32m {args.bs}, \t\033[34m(training, test) = \033[32m ({Paras['train_data_num']}, {Paras['test_data_num']}), \t\033[34m device:\033[32m {Paras['device']}"
|
|
16
|
+
f"\033[32m✅ \033[34mDataset:\033[32m {data_name}, \t\033[34mBatch-size:\033[32m {args.bs}, \t\033[34m(training, test) = \033[32m ({Paras['train_data_num']}/{Paras['train_data_all_num']}, {Paras['test_data_num']}/{Paras['test_data_all_num']}), \t\033[34m device:\033[32m {Paras['device']}"
|
|
17
17
|
)
|
|
18
18
|
print(
|
|
19
19
|
f"\033[32m✅ \033[34mOptimizer:\033[32m {optimizer_name}, \t\033[34mParams:\033[32m {hyperparams}"
|
|
@@ -27,7 +27,7 @@ def training_info(args, data_name, optimizer_name, hyperparams, Paras, model_nam
|
|
|
27
27
|
else:
|
|
28
28
|
print("-" * 115)
|
|
29
29
|
print(
|
|
30
|
-
f"✅ Dataset: {data_name}, \tBatch-size: {args.bs}, \t(training, test) = ({Paras['train_data_num']}, {Paras['test_data_num']}), \tdevice: {Paras['device']}"
|
|
30
|
+
f"✅ Dataset: {data_name}, \tBatch-size: {args.bs}, \t(training, test) = ({Paras['train_data_num']}/{Paras['train_data_all_num']}, {Paras['test_data_num']}/{Paras['test_data_all_num']}), \tdevice: {Paras['device']}"
|
|
31
31
|
)
|
|
32
32
|
print(f"✅ Optimizer: {optimizer_name}, \tParams: {hyperparams}")
|
|
33
33
|
print(
|
junshan_kit/TrainingHub.py
CHANGED
|
@@ -90,8 +90,8 @@ def load_data(model_name, data_name, Paras):
|
|
|
90
90
|
assert False
|
|
91
91
|
|
|
92
92
|
# Computing the number of data
|
|
93
|
-
Paras["train_data_num"] = len(train_dataset)
|
|
94
|
-
Paras["
|
|
93
|
+
Paras["train_data_num"], Paras["test_data_num"] = len(train_dataset), len(test_dataset)
|
|
94
|
+
Paras["train_data_all_num"], Paras["test_data_all_num"] = len(train_dataset), len(test_dataset)
|
|
95
95
|
|
|
96
96
|
return train_dataset, test_dataset, Paras
|
|
97
97
|
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
junshan_kit/BenchmarkFunctions.py,sha256=tXgZGg-CjTNz78nMyVEQflVFIJDgmmePytXjY_RT9BM,120
|
|
2
2
|
junshan_kit/Check_Info.py,sha256=Z6Ls2S7Fl4h8S9s0NB8jP_YpSLZInvQAeyjIXzq5Bpc,1872
|
|
3
3
|
junshan_kit/DataHub.py,sha256=6RCNr8dBTqK-8ey4m-baMU1qOsJP6swOFkaraGdk0fM,6801
|
|
4
|
-
junshan_kit/DataProcessor.py,sha256=
|
|
4
|
+
junshan_kit/DataProcessor.py,sha256=Uc9ixhnVmGf5PoGIe3vvhobH_ADtDAosG9MTjnB1KDQ,15677
|
|
5
5
|
junshan_kit/DataSets.py,sha256=DcpwWRm1_B29hIDjOhvaeKAYYeBknEW2QqsS_qm8Hxs,13367
|
|
6
6
|
junshan_kit/Evaluate_Metrics.py,sha256=PQBGU8fETIvDon1VMdouZ1dhG2n7XHYGbzs2EQUA9FM,3392
|
|
7
|
-
junshan_kit/FiguresHub.py,sha256=
|
|
7
|
+
junshan_kit/FiguresHub.py,sha256=TVbo9ioEECrH_iJjpt0HgkCoiAdFEcTdtiUtzDNYrJY,10455
|
|
8
8
|
junshan_kit/ModelsHub.py,sha256=xM6cwLecq9vukrt1c9l7l9dy7mQn3yq0ZwrRg5f_CfM,7995
|
|
9
|
-
junshan_kit/ParametersHub.py,sha256=
|
|
10
|
-
junshan_kit/Print_Info.py,sha256=
|
|
11
|
-
junshan_kit/TrainingHub.py,sha256=
|
|
9
|
+
junshan_kit/ParametersHub.py,sha256=_LvkdV95vKSU4h2LtF7W63EaF5mRBg85ZHf0ymb28tA,20248
|
|
10
|
+
junshan_kit/Print_Info.py,sha256=7pfd_mGEuQdQGyz6kcSSvjVRCrPgi5RafQgi7ZSS9VU,4890
|
|
11
|
+
junshan_kit/TrainingHub.py,sha256=unoI8zzm0oekUxz-3retHCFhxwx6j8e6Tp9VQDywTPg,11565
|
|
12
12
|
junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
junshan_kit/kit.py,sha256=tQGoJJQZW9BeadX2cuwhvOxX2riHBZG0iFExelS4MIY,11487
|
|
14
14
|
junshan_kit/OptimizerHup/OptimizerFactory.py,sha256=x1_cE5ZSkKffdY0uCIirocBNj2X-u_R-V5jNawJ1EfA,4607
|
|
15
|
-
junshan_kit/OptimizerHup/SPBM.py,sha256=
|
|
15
|
+
junshan_kit/OptimizerHup/SPBM.py,sha256=h449QddeN0MvUIQeKcNxFsdxdBuhN354sGc_sN2LZR8,13816
|
|
16
16
|
junshan_kit/OptimizerHup/SPBM_func.py,sha256=5Fz6eHYIVGMoR_CBDA_Xk_1dnPRq3K16DUNoNaWQ2Ag,17301
|
|
17
17
|
junshan_kit/OptimizerHup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
-
junshan_kit-2.
|
|
19
|
-
junshan_kit-2.
|
|
20
|
-
junshan_kit-2.
|
|
18
|
+
junshan_kit-2.8.5.dist-info/METADATA,sha256=vxUXFvPqaGc2liYh0A3B4CmlbdlrK1CVdRNx0i7RoRA,455
|
|
19
|
+
junshan_kit-2.8.5.dist-info/WHEEL,sha256=aha0VrrYvgDJ3Xxl3db_g_MDIW-ZexDdrc_m-Hk8YY4,105
|
|
20
|
+
junshan_kit-2.8.5.dist-info/RECORD,,
|
|
File without changes
|