junshan-kit 2.5.1__py2.py3-none-any.whl → 2.7.3__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- junshan_kit/BenchmarkFunctions.py +7 -0
- junshan_kit/Check_Info.py +44 -0
- junshan_kit/DataHub.py +108 -8
- junshan_kit/DataProcessor.py +86 -8
- junshan_kit/DataSets.py +29 -30
- junshan_kit/Evaluate_Metrics.py +75 -2
- junshan_kit/FiguresHub.py +286 -0
- junshan_kit/ModelsHub.py +32 -5
- junshan_kit/OptimizerHup/OptimizerFactory.py +130 -0
- junshan_kit/OptimizerHup/SPBM.py +350 -0
- junshan_kit/OptimizerHup/SPBM_func.py +602 -0
- junshan_kit/OptimizerHup/__init__.py +0 -0
- junshan_kit/ParametersHub.py +390 -119
- junshan_kit/Print_Info.py +57 -11
- junshan_kit/TrainingHub.py +190 -40
- junshan_kit/kit.py +39 -50
- {junshan_kit-2.5.1.dist-info → junshan_kit-2.7.3.dist-info}/METADATA +7 -1
- junshan_kit-2.7.3.dist-info/RECORD +20 -0
- {junshan_kit-2.5.1.dist-info → junshan_kit-2.7.3.dist-info}/WHEEL +1 -1
- junshan_kit-2.5.1.dist-info/RECORD +0 -13
junshan_kit/Print_Info.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from junshan_kit import ParametersHub
|
|
1
|
+
from junshan_kit import DataSets, ParametersHub
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
# -------------------------------------------------------------
|
|
@@ -9,8 +9,8 @@ def training_group(training_group):
|
|
|
9
9
|
print(f"-------------------------------------------------------")
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def training_info(args,
|
|
13
|
-
if use_color:
|
|
12
|
+
def training_info(args, data_name, optimizer_name, hyperparams, Paras, model_name):
|
|
13
|
+
if Paras["use_color"]:
|
|
14
14
|
print("\033[90m" + "-" * 115 + "\033[0m")
|
|
15
15
|
print(
|
|
16
16
|
f"\033[32m✅ \033[34mDataset:\033[32m {data_name}, \t\033[34mBatch-size:\033[32m {args.bs}, \t\033[34m(training, test) = \033[32m ({Paras['train_data_num']}, {Paras['test_data_num']}), \t\033[34m device:\033[32m {Paras['device']}"
|
|
@@ -21,27 +21,43 @@ def training_info(args, use_color, data_name, optimizer_name, folder_path, hyper
|
|
|
21
21
|
print(
|
|
22
22
|
f'\033[32m✅ \033[34mmodel:\033[32m {model_name}, \t\033[34mmodel type:\033[32m {Paras["model_type"][model_name]},\t\033[34m loss_fn:\033[32m {Paras["loss_fn"]}'
|
|
23
23
|
)
|
|
24
|
-
print(f
|
|
24
|
+
print(f'\033[32m✅ \033[34mResults_folder:\033[32m {Paras["Results_folder"]}')
|
|
25
25
|
print("\033[90m" + "-" * 115 + "\033[0m")
|
|
26
26
|
|
|
27
27
|
else:
|
|
28
28
|
print("-" * 115)
|
|
29
29
|
print(
|
|
30
|
-
f"✅ Dataset: {data_name}, \tBatch-size: {
|
|
30
|
+
f"✅ Dataset: {data_name}, \tBatch-size: {args.bs}, \t(training, test) = ({Paras['train_data_num']}, {Paras['test_data_num']}), \tdevice: {Paras['device']}"
|
|
31
31
|
)
|
|
32
32
|
print(f"✅ Optimizer: {optimizer_name}, \tParams: {hyperparams}")
|
|
33
33
|
print(
|
|
34
|
-
f
|
|
34
|
+
f"✅ model: {model_name}, \tmodel type: {Paras['model_type'][model_name]}, \tloss_fn: {Paras['loss_fn']}"
|
|
35
35
|
)
|
|
36
|
-
print(f"✅
|
|
36
|
+
print(f"✅ Results_folder: {Paras['Results_folder']}")
|
|
37
37
|
print("-" * 115)
|
|
38
38
|
|
|
39
39
|
# <Step_7_2>
|
|
40
40
|
|
|
41
|
-
def
|
|
41
|
+
def per_epoch_info(Paras, epoch, metrics, time):
|
|
42
|
+
if Paras["use_color"]:
|
|
43
|
+
print(
|
|
44
|
+
f'\033[34m epoch = \033[32m{epoch+1}/{Paras["epochs"]}\033[0m,\t\b'
|
|
45
|
+
f'\033[34m training_loss = \033[32m{metrics["training_loss"][epoch+1]:.4e}\033[0m,\t\b'
|
|
46
|
+
f'\033[34m training_acc = \033[32m{100 * metrics["training_acc"][epoch+1]:.2f}\033[0m,\t\b'
|
|
47
|
+
f'\033[34m time = \033[32m{time:.2f}\033[0m,\t\b')
|
|
48
|
+
|
|
49
|
+
else:
|
|
50
|
+
print(
|
|
51
|
+
f"epoch = {epoch+1}/{Paras['epochs']},\t"
|
|
52
|
+
f"training_loss = {metrics['training_loss'][epoch+1]:.4e},\t"
|
|
53
|
+
f"training_acc = {100 * metrics['training_acc'][epoch+1]:.2f}%,\t"
|
|
54
|
+
f"time = {time:.2f}"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def print_per_epoch_info(epoch, Paras, epoch_loss, training_loss, training_acc, test_loss, test_acc, run_time):
|
|
42
58
|
epochs = Paras["epochs"][Paras["data_name"]]
|
|
43
59
|
# result = [(k, f"{v:.4f}") for k, v in run_time.items()]
|
|
44
|
-
if use_color:
|
|
60
|
+
if Paras["use_color"]:
|
|
45
61
|
print(
|
|
46
62
|
f'\033[34m epoch = \033[32m{epoch+1}/{epochs}\033[0m,\t\b'
|
|
47
63
|
f'\033[34m epoch_loss = \033[32m{epoch_loss[epoch+1]:.4e}\033[0m,\t\b'
|
|
@@ -59,5 +75,35 @@ def print_per_epoch_info(use_color, epoch, Paras, epoch_loss, training_loss, tra
|
|
|
59
75
|
f'time (ep, tr, te) = ({run_time["epoch"]:.2f}, {run_time["train"]:.2f}, {run_time["test"]:.2f})')
|
|
60
76
|
|
|
61
77
|
|
|
62
|
-
def
|
|
63
|
-
print(ParametersHub.data_list.__doc__)
|
|
78
|
+
def all_data_info():
|
|
79
|
+
print(ParametersHub.data_list.__doc__)
|
|
80
|
+
|
|
81
|
+
def data_info_DHI():
|
|
82
|
+
data = DataSets.adult_income_prediction(print_info=True, export_csv=False)
|
|
83
|
+
|
|
84
|
+
def data_info_CCFD():
|
|
85
|
+
data = DataSets.credit_card_fraud_detection(print_info=True, export_csv=False)
|
|
86
|
+
|
|
87
|
+
def data_info_AIP():
|
|
88
|
+
data = DataSets.adult_income_prediction(print_info=True, export_csv=False)
|
|
89
|
+
|
|
90
|
+
def data_info_EVP():
|
|
91
|
+
data = DataSets.electric_vehicle_population(print_info=True, export_csv=False)
|
|
92
|
+
|
|
93
|
+
def data_info_GHP():
|
|
94
|
+
data = DataSets.global_house_purchase(print_info=True, export_csv=False)
|
|
95
|
+
|
|
96
|
+
def data_info_HL():
|
|
97
|
+
data = DataSets.health_lifestyle(print_info=True, export_csv=False)
|
|
98
|
+
|
|
99
|
+
def data_info_HQC():
|
|
100
|
+
data = DataSets.Homesite_Quote_Conversion(print_info=True)
|
|
101
|
+
|
|
102
|
+
def data_info_IEEE_CIS():
|
|
103
|
+
data = DataSets.IEEE_CIS_Fraud_Detection(print_info=True)
|
|
104
|
+
|
|
105
|
+
def data_info_MICP():
|
|
106
|
+
data = DataSets.medical_insurance_cost_prediction(print_info=True)
|
|
107
|
+
|
|
108
|
+
def data_info_PPE():
|
|
109
|
+
data = DataSets.particle_physics_event_classification(print_info=True)
|
junshan_kit/TrainingHub.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import torch, time
|
|
1
|
+
import torch, time, pickle
|
|
2
2
|
import torch.nn as nn
|
|
3
3
|
import numpy as np
|
|
4
4
|
import torch.utils.data as Data
|
|
5
5
|
from torch.nn.utils import parameters_to_vector
|
|
6
|
-
from junshan_kit import DataHub,
|
|
6
|
+
from junshan_kit import DataHub, TrainingHub, Evaluate_Metrics, DataProcessor, Print_Info, ParametersHub
|
|
7
|
+
|
|
8
|
+
from junshan_kit.OptimizerHup import OptimizerFactory, SPBM
|
|
7
9
|
|
|
8
10
|
def chosen_loss_fn(model_name, Paras):
|
|
9
11
|
# ---------------------------------------
|
|
@@ -28,14 +30,13 @@ def chosen_loss_fn(model_name, Paras):
|
|
|
28
30
|
assert False
|
|
29
31
|
|
|
30
32
|
Paras["loss_fn"] = loss_fn
|
|
31
|
-
|
|
32
33
|
return loss_fn, Paras
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
def load_data(model_name, data_name, Paras):
|
|
36
37
|
# load data
|
|
37
|
-
train_path = f"./exp_data/{data_name}/
|
|
38
|
-
test_path = f"./exp_data/{data_name}/
|
|
38
|
+
train_path = f"./exp_data/{data_name}/{data_name}_training"
|
|
39
|
+
test_path = f"./exp_data/{data_name}/{data_name}_test"
|
|
39
40
|
|
|
40
41
|
if data_name == "MNIST":
|
|
41
42
|
train_dataset, test_dataset, transform = DataHub.MNIST(Paras, model_name)
|
|
@@ -49,12 +50,28 @@ def load_data(model_name, data_name, Paras):
|
|
|
49
50
|
elif data_name == "Credit_Card_Fraud_Detection":
|
|
50
51
|
train_dataset, test_dataset, transform = DataHub.Credit_Card_Fraud_Detection(Paras)
|
|
51
52
|
|
|
53
|
+
elif data_name == "Diabetes_Health_Indicators":
|
|
54
|
+
train_dataset, test_dataset, transform = DataHub.Diabetes_Health_Indicators(Paras)
|
|
52
55
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
elif data_name == "Electric_Vehicle_Population":
|
|
57
|
+
train_dataset, test_dataset, transform = DataHub.Electric_Vehicle_Population(Paras)
|
|
58
|
+
|
|
59
|
+
elif data_name == "Global_House_Purchase":
|
|
60
|
+
train_dataset, test_dataset, transform = DataHub.Global_House_Purchase(Paras)
|
|
61
|
+
|
|
62
|
+
elif data_name == "Health_Lifestyle":
|
|
63
|
+
train_dataset, test_dataset, transform = DataHub.Health_Lifestyle(Paras)
|
|
64
|
+
|
|
65
|
+
elif data_name == "Homesite_Quote_Conversion":
|
|
66
|
+
train_dataset, test_dataset, transform = DataHub.Homesite_Quote_Conversion(Paras)
|
|
67
|
+
|
|
68
|
+
elif data_name == "TN_Weather_2020_2025":
|
|
69
|
+
train_dataset, test_dataset, transform = DataHub.TN_Weather_2020_2025(Paras)
|
|
70
|
+
|
|
71
|
+
elif data_name == "Caltech101_Resize_32":
|
|
72
|
+
train_dataset, test_dataset, transform = DataHub.Caltech101_Resize_32(
|
|
73
|
+
Paras, 0.7, split=True
|
|
74
|
+
)
|
|
58
75
|
|
|
59
76
|
# elif data_name in ["Vowel", "Letter", "Shuttle", "w8a"]:
|
|
60
77
|
# Paras["train_ratio"] = Paras["split_train_data"][data_name]
|
|
@@ -62,19 +79,21 @@ def load_data(model_name, data_name, Paras):
|
|
|
62
79
|
# train_path + ".txt", test_path + ".txt", data_name
|
|
63
80
|
# )
|
|
64
81
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
# )
|
|
82
|
+
elif data_name in ["RCV1", "Duke", "Ijcnn"]:
|
|
83
|
+
train_dataset, test_dataset, transform = DataProcessor.get_libsvm_bz2_data(
|
|
84
|
+
train_path + ".bz2", test_path + ".bz2", data_name, Paras
|
|
85
|
+
)
|
|
70
86
|
|
|
71
87
|
else:
|
|
72
88
|
transform = None
|
|
73
89
|
print(f"The data_name is error!")
|
|
74
90
|
assert False
|
|
75
91
|
|
|
76
|
-
|
|
92
|
+
# Computing the number of data
|
|
93
|
+
Paras["train_data_num"] = len(train_dataset)
|
|
94
|
+
Paras["test_data_num"] = len(test_dataset)
|
|
77
95
|
|
|
96
|
+
return train_dataset, test_dataset, Paras
|
|
78
97
|
|
|
79
98
|
def get_dataloader(data_name, train_dataset, test_dataset, Paras):
|
|
80
99
|
ParametersHub.set_seed(Paras["seed"])
|
|
@@ -86,7 +105,7 @@ def get_dataloader(data_name, train_dataset, test_dataset, Paras):
|
|
|
86
105
|
shuffle=True,
|
|
87
106
|
batch_size=Paras["batch_size"],
|
|
88
107
|
generator=g,
|
|
89
|
-
num_workers=
|
|
108
|
+
num_workers=0,
|
|
90
109
|
)
|
|
91
110
|
|
|
92
111
|
test_loader = Data.DataLoader(
|
|
@@ -94,7 +113,7 @@ def get_dataloader(data_name, train_dataset, test_dataset, Paras):
|
|
|
94
113
|
shuffle=False,
|
|
95
114
|
batch_size=Paras["batch_size"],
|
|
96
115
|
generator=g,
|
|
97
|
-
num_workers=
|
|
116
|
+
num_workers=0,
|
|
98
117
|
)
|
|
99
118
|
|
|
100
119
|
return train_loader, test_loader
|
|
@@ -110,6 +129,42 @@ def chosen_optimizer(optimizer_name, model, hyperparams, Paras):
|
|
|
110
129
|
betas=(hyperparams["beta1"], hyperparams["beta2"]),
|
|
111
130
|
eps=hyperparams["epsilon"],
|
|
112
131
|
)
|
|
132
|
+
|
|
133
|
+
elif optimizer_name in ["Bundle"]:
|
|
134
|
+
optimizer = OptimizerFactory.Bundle(
|
|
135
|
+
model.parameters(), model, hyperparams, Paras
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
elif optimizer_name in ["ALR-SMAG"]:
|
|
139
|
+
optimizer = OptimizerFactory.ALR_SMAG(
|
|
140
|
+
model.parameters(), model, hyperparams, Paras
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
elif optimizer_name in ["SPBM-TR"]:
|
|
144
|
+
optimizer = SPBM.TR(model.parameters(), model, hyperparams, Paras)
|
|
145
|
+
|
|
146
|
+
elif optimizer_name in ["SPBM-TR-NoneLower"]:
|
|
147
|
+
optimizer = SPBM.TR_NoneLower(model.parameters(), model, hyperparams, Paras)
|
|
148
|
+
|
|
149
|
+
elif optimizer_name in ["SPBM-TR-NoneSpecial"]:
|
|
150
|
+
optimizer = SPBM.TR_NoneSpecial(model.parameters(), model, hyperparams, Paras)
|
|
151
|
+
|
|
152
|
+
elif optimizer_name in ["SPBM-TR-NoneCut"]:
|
|
153
|
+
optimizer = SPBM.TR_NoneCut(model.parameters(), model, hyperparams, Paras)
|
|
154
|
+
|
|
155
|
+
elif optimizer_name in ["SPBM-PF-NoneLower"]:
|
|
156
|
+
optimizer = SPBM.PF_NoneLower(model.parameters(), model, hyperparams, Paras)
|
|
157
|
+
|
|
158
|
+
elif optimizer_name in ["SPBM-PF"]:
|
|
159
|
+
optimizer = SPBM.PF(model.parameters(), model, hyperparams, Paras)
|
|
160
|
+
|
|
161
|
+
elif optimizer_name in ["SPBM-PF-NoneCut"]:
|
|
162
|
+
optimizer = SPBM.PF_NoneCut(model.parameters(), model, hyperparams, Paras)
|
|
163
|
+
|
|
164
|
+
elif optimizer_name in ["SPSmax"]:
|
|
165
|
+
optimizer = OptimizerFactory.SPSmax(
|
|
166
|
+
model.parameters(), model, hyperparams, Paras
|
|
167
|
+
)
|
|
113
168
|
|
|
114
169
|
else:
|
|
115
170
|
raise NotImplementedError(f"{optimizer_name} is not supported.")
|
|
@@ -124,26 +179,56 @@ def load_model_dataloader(base_model_fun, initial_state_dict, data_name, train_d
|
|
|
124
179
|
train_loader, test_loader = TrainingHub.get_dataloader(data_name, train_dataset, test_dataset, Paras)
|
|
125
180
|
|
|
126
181
|
return model, train_loader, test_loader
|
|
127
|
-
|
|
182
|
+
# <training>
|
|
128
183
|
def train(train_loader, optimizer_name, optimizer, model, loss_fn, Paras):
|
|
184
|
+
train_time = time.time()
|
|
129
185
|
metrics = ParametersHub.metrics()
|
|
130
186
|
for epoch in range(Paras["epochs"]):
|
|
187
|
+
epoch_time = time.time()
|
|
131
188
|
for index, (X, Y) in enumerate(train_loader):
|
|
132
189
|
X, Y = X.to(Paras["device"]), Y.to(Paras["device"])
|
|
133
190
|
|
|
191
|
+
if epoch == 0 and index == 0:
|
|
192
|
+
# # compute gradient norm
|
|
193
|
+
# with torch.no_grad():
|
|
194
|
+
# g_k = parameters_to_vector(
|
|
195
|
+
# [
|
|
196
|
+
# p.grad if p.grad is not None else torch.zeros_like(p)
|
|
197
|
+
# for p in model.parameters()
|
|
198
|
+
# ]
|
|
199
|
+
# )
|
|
200
|
+
# metrics["grad_norm"].append(torch.norm(g_k, p=2).detach().cpu().item())
|
|
201
|
+
# print(metrics["grad_norm"][-1])
|
|
202
|
+
|
|
203
|
+
# initial training loss
|
|
204
|
+
initial_time = time.time()
|
|
205
|
+
initial_loss, initial_correct = Evaluate_Metrics.get_loss_acc(train_loader, model, loss_fn, Paras)
|
|
206
|
+
metrics["training_loss"].append(initial_loss)
|
|
207
|
+
metrics["training_acc"].append(initial_correct)
|
|
208
|
+
|
|
209
|
+
Print_Info.per_epoch_info(Paras, -1, metrics, time.time() - initial_time)
|
|
210
|
+
|
|
211
|
+
# Update the model
|
|
134
212
|
if optimizer_name in ["SGD", "ADAM"]:
|
|
135
213
|
optimizer.zero_grad()
|
|
136
|
-
loss = Evaluate_Metrics.
|
|
214
|
+
loss = Evaluate_Metrics.loss(X, Y, model, loss_fn, Paras)
|
|
137
215
|
loss.backward()
|
|
138
216
|
optimizer.step()
|
|
139
217
|
|
|
140
218
|
elif optimizer_name in [
|
|
141
219
|
"Bundle",
|
|
142
|
-
"
|
|
220
|
+
"SPBM-TR",
|
|
221
|
+
"SPBM-PF",
|
|
222
|
+
"ALR-SMAG",
|
|
223
|
+
"SPSmax",
|
|
224
|
+
"SPBM-TR-NoneSpecial",
|
|
225
|
+
"SPBM-TR-NoneLower",
|
|
226
|
+
"SPBM-TR-NoneCut",
|
|
227
|
+
"SPBM-PF-NoneCut",
|
|
143
228
|
]:
|
|
144
229
|
def closure():
|
|
145
230
|
optimizer.zero_grad()
|
|
146
|
-
loss = Evaluate_Metrics.
|
|
231
|
+
loss = Evaluate_Metrics.loss(X, Y, model, loss_fn, Paras)
|
|
147
232
|
loss.backward()
|
|
148
233
|
return loss
|
|
149
234
|
|
|
@@ -152,23 +237,88 @@ def train(train_loader, optimizer_name, optimizer, model, loss_fn, Paras):
|
|
|
152
237
|
else:
|
|
153
238
|
loss = 0
|
|
154
239
|
raise NotImplementedError(f"{optimizer_name} is not supported.")
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
with torch.no_grad():
|
|
160
|
-
g_k = parameters_to_vector(
|
|
161
|
-
[
|
|
162
|
-
p.grad if p.grad is not None else torch.zeros_like(p)
|
|
163
|
-
for p in model.parameters()
|
|
164
|
-
]
|
|
165
|
-
)
|
|
166
|
-
metrics["grad_norm"].append(torch.norm(g_k, p=2).detach().cpu().item())
|
|
167
|
-
|
|
168
|
-
metrics["per_epoch_loss"].append(loss.item())
|
|
169
|
-
metrics["epoch_loss"].append(np.mean(metrics["per_epoch_loss"]).item())
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
# Evaluation
|
|
243
|
+
training_loss, training_acc = Evaluate_Metrics.get_loss_acc(train_loader, model, loss_fn, Paras)
|
|
170
244
|
|
|
171
245
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
246
|
+
metrics["training_loss"].append(training_loss)
|
|
247
|
+
metrics["training_acc"].append(training_acc)
|
|
248
|
+
|
|
249
|
+
Print_Info.per_epoch_info(Paras, epoch, metrics, time.time() - epoch_time)
|
|
250
|
+
|
|
251
|
+
time_cost = time.time() - train_time
|
|
252
|
+
metrics["train_time"] = time_cost
|
|
253
|
+
|
|
254
|
+
return metrics
|
|
255
|
+
# <training>
|
|
256
|
+
|
|
257
|
+
def Record_Results(hyperparams,data_name, model_name, optimizer_name, metrics, Paras):
|
|
258
|
+
|
|
259
|
+
keys = list(hyperparams.keys())
|
|
260
|
+
values = list(hyperparams.values())
|
|
261
|
+
|
|
262
|
+
param_str = "_".join(f"{k}_{v}" for k, v in zip(keys, values))
|
|
263
|
+
|
|
264
|
+
if model_name not in Paras["Results_dict"]:
|
|
265
|
+
Paras["Results_dict"][model_name] = {}
|
|
266
|
+
|
|
267
|
+
if data_name not in Paras["Results_dict"][model_name]:
|
|
268
|
+
Paras["Results_dict"][model_name][data_name] = {}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
if optimizer_name not in Paras["Results_dict"][model_name][data_name]:
|
|
272
|
+
Paras["Results_dict"][model_name][data_name][optimizer_name] = {}
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
Paras["Results_dict"][model_name][data_name][optimizer_name][param_str] = {
|
|
276
|
+
"training_acc": metrics["training_acc"],
|
|
277
|
+
"training_loss": metrics["training_loss"],
|
|
278
|
+
"train_time": metrics["train_time"]
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
return Paras
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def Save_Results(Paras, model_name, data_name, optimizer_name):
|
|
285
|
+
"""
|
|
286
|
+
Save the result dictionary for a specific (model, dataset, optimizer) combination.
|
|
287
|
+
|
|
288
|
+
Parameters
|
|
289
|
+
----------
|
|
290
|
+
Paras : dict or Namespace
|
|
291
|
+
A container holding all experiment-related information, where:
|
|
292
|
+
- Paras["Results_folder"] : str
|
|
293
|
+
Directory to save result files.
|
|
294
|
+
- Paras["Results_dict"] : dict
|
|
295
|
+
Nested dictionary storing experiment results.
|
|
296
|
+
|
|
297
|
+
model_name : str
|
|
298
|
+
Full name of the model (e.g., "LeastSquares").
|
|
299
|
+
|
|
300
|
+
data_name : str
|
|
301
|
+
Name of the dataset used in the experiment.
|
|
302
|
+
|
|
303
|
+
optimizer_name : str
|
|
304
|
+
Name of the optimizer for which the results are saved.
|
|
305
|
+
|
|
306
|
+
Notes
|
|
307
|
+
-----
|
|
308
|
+
The function generates a filename in the format:
|
|
309
|
+
Results_{model_abbr}_{dataset_abbr}_{optimizer}.pkl
|
|
310
|
+
and dumps the corresponding result dictionary to disk.
|
|
311
|
+
"""
|
|
312
|
+
|
|
313
|
+
# Construct the output file path using model/dataset abbreviations
|
|
314
|
+
filename = (
|
|
315
|
+
f'{Paras["Results_folder"]}/'
|
|
316
|
+
f'Results_{ParametersHub.model_abbr(model_name)}_'
|
|
317
|
+
f'{data_name}_'
|
|
318
|
+
f'{optimizer_name}.pkl'
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
# Save the nested results dict to disk
|
|
322
|
+
with open(filename, "wb") as f:
|
|
323
|
+
pickle.dump(Paras["Results_dict"][model_name][data_name][optimizer_name], f)
|
|
324
|
+
|
junshan_kit/kit.py
CHANGED
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
>>> Last Updated : 2025-10-13
|
|
5
5
|
----------------------------------------------------------------------
|
|
6
6
|
"""
|
|
7
|
-
|
|
7
|
+
import subprocess, smtplib
|
|
8
8
|
import zipfile
|
|
9
|
-
import os, time, openml
|
|
9
|
+
import os, time, openml, pickle
|
|
10
10
|
|
|
11
11
|
from selenium import webdriver
|
|
12
12
|
from selenium.webdriver.common.by import By
|
|
@@ -258,54 +258,43 @@ def download_openml_data(data_name):
|
|
|
258
258
|
return X, y, categorical_indicator, attribute_names
|
|
259
259
|
|
|
260
260
|
|
|
261
|
-
def
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
# Append new paths to .gitignore
|
|
299
|
-
if ignore_list:
|
|
300
|
-
with open(gitignore_path, "a", encoding="utf-8") as f:
|
|
301
|
-
for p in ignore_list:
|
|
302
|
-
f.write(p + "\n")
|
|
303
|
-
print(f"\n✅ The following paths have been added to .gitignore:\n" + "\n".join(ignore_list))
|
|
304
|
-
else:
|
|
305
|
-
print("\nNo folders exceed the size limit (99 MB).")
|
|
306
|
-
|
|
307
|
-
folder_path = "./exp_data"
|
|
308
|
-
list_and_ignore_large_folders(folder_path, limit_mb=99)
|
|
261
|
+
def read_pkl_data(file_path):
|
|
262
|
+
"""
|
|
263
|
+
Read data from a pickle file at the specified path
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
file_path (str): Path to the pickle file
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
object: Data object loaded from the pickle file
|
|
270
|
+
"""
|
|
271
|
+
with open(file_path, 'rb') as f:
|
|
272
|
+
data = pickle.load(f)
|
|
273
|
+
|
|
274
|
+
return data
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def git_commit_push(commit_message, repo_path="."):
|
|
278
|
+
try:
|
|
279
|
+
subprocess.run(["git", "-C", repo_path, "add", "."], check=True)
|
|
280
|
+
subprocess.run(["git", "-C", repo_path, "commit", "-q", "-m", commit_message], check=True)
|
|
281
|
+
subprocess.run(["git", "-C", repo_path, "push", "-q"], check=True)
|
|
282
|
+
print("Submitted and pushed successfully!")
|
|
283
|
+
except subprocess.CalledProcessError as e:
|
|
284
|
+
print(f"Git Command execution failed: {e}")
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def seed_meg(meg, Subject, from_email, to_email, from_pwd):
|
|
288
|
+
from email.mime.text import MIMEText
|
|
289
|
+
msg = MIMEText(meg)
|
|
290
|
+
msg["Subject"] = Subject
|
|
291
|
+
msg["From"] = from_email
|
|
292
|
+
msg["To"] = to_email
|
|
293
|
+
|
|
294
|
+
server = smtplib.SMTP_SSL("smtp.qq.com", 465)
|
|
295
|
+
server.login(from_email, from_pwd)
|
|
296
|
+
server.sendmail(from_email, [to_email], msg.as_string())
|
|
297
|
+
server.quit()
|
|
309
298
|
|
|
310
299
|
|
|
311
300
|
|
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: junshan_kit
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.7.3
|
|
4
4
|
Summary: This is an optimization tool.
|
|
5
5
|
Author-email: Junshan Yin <junshanyin@163.com>
|
|
6
|
+
Requires-Dist: cvxpy==1.6.5
|
|
6
7
|
Requires-Dist: kaggle==1.7.4.5
|
|
7
8
|
Requires-Dist: kagglehub==0.3.13
|
|
9
|
+
Requires-Dist: matplotlib==3.10.3
|
|
10
|
+
Requires-Dist: numpy==2.2.6
|
|
8
11
|
Requires-Dist: openml==0.15.1
|
|
12
|
+
Requires-Dist: scikit-learn==1.7.1
|
|
9
13
|
Requires-Dist: selenium==4.36.0
|
|
14
|
+
Requires-Dist: torch==2.6.0
|
|
15
|
+
Requires-Dist: torchvision==0.21.0
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
junshan_kit/BenchmarkFunctions.py,sha256=tXgZGg-CjTNz78nMyVEQflVFIJDgmmePytXjY_RT9BM,120
|
|
2
|
+
junshan_kit/Check_Info.py,sha256=Z6Ls2S7Fl4h8S9s0NB8jP_YpSLZInvQAeyjIXzq5Bpc,1872
|
|
3
|
+
junshan_kit/DataHub.py,sha256=6RCNr8dBTqK-8ey4m-baMU1qOsJP6swOFkaraGdk0fM,6801
|
|
4
|
+
junshan_kit/DataProcessor.py,sha256=W2bzugcYnwQC403GdvSmGDBhfz8X1KxJBkOAVg1vHHk,14385
|
|
5
|
+
junshan_kit/DataSets.py,sha256=DcpwWRm1_B29hIDjOhvaeKAYYeBknEW2QqsS_qm8Hxs,13367
|
|
6
|
+
junshan_kit/Evaluate_Metrics.py,sha256=PQBGU8fETIvDon1VMdouZ1dhG2n7XHYGbzs2EQUA9FM,3392
|
|
7
|
+
junshan_kit/FiguresHub.py,sha256=116cvRUGUcBqIAs0_xiRzZCzgnPaqmgI5kvNu6cAd_Q,10181
|
|
8
|
+
junshan_kit/ModelsHub.py,sha256=xM6cwLecq9vukrt1c9l7l9dy7mQn3yq0ZwrRg5f_CfM,7995
|
|
9
|
+
junshan_kit/ParametersHub.py,sha256=RSgsSlH0bgehn27lleKfboT1MuLAyIMxZ5FWC-ANbhA,19822
|
|
10
|
+
junshan_kit/Print_Info.py,sha256=uBLpeynOYSZTN8LbJupSH1SuLZ-7cMU3Yp3IlVJWB1s,4772
|
|
11
|
+
junshan_kit/TrainingHub.py,sha256=WV3cUz4JsEdGTpbTqgnU3WmlKeob8RAOuL993EsADj0,11469
|
|
12
|
+
junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
junshan_kit/kit.py,sha256=tQGoJJQZW9BeadX2cuwhvOxX2riHBZG0iFExelS4MIY,11487
|
|
14
|
+
junshan_kit/OptimizerHup/OptimizerFactory.py,sha256=x1_cE5ZSkKffdY0uCIirocBNj2X-u_R-V5jNawJ1EfA,4607
|
|
15
|
+
junshan_kit/OptimizerHup/SPBM.py,sha256=2Yg8Fmc8OkYOrjevD8eAGfI-m-fefoOldybtlp4ZEEs,13730
|
|
16
|
+
junshan_kit/OptimizerHup/SPBM_func.py,sha256=5Fz6eHYIVGMoR_CBDA_Xk_1dnPRq3K16DUNoNaWQ2Ag,17301
|
|
17
|
+
junshan_kit/OptimizerHup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
+
junshan_kit-2.7.3.dist-info/METADATA,sha256=_mIq2RLQUdQECGnjRK4qABiPr41BGiK-aCkk4EQVKik,455
|
|
19
|
+
junshan_kit-2.7.3.dist-info/WHEEL,sha256=aha0VrrYvgDJ3Xxl3db_g_MDIW-ZexDdrc_m-Hk8YY4,105
|
|
20
|
+
junshan_kit-2.7.3.dist-info/RECORD,,
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
junshan_kit/DataHub.py,sha256=D9G2vjCNvDLer4qoKdowgWJChLMQQn7cVhAPZLvRrbE,3332
|
|
2
|
-
junshan_kit/DataProcessor.py,sha256=-6qjG52NDYq746vBPpc0uW2cfbc4syqSWZIzTxJt6fE,11806
|
|
3
|
-
junshan_kit/DataSets.py,sha256=hwGnJsb-Lj90lk6VBwmsDBb3-IA_WgUWzAKayHyq2AI,13391
|
|
4
|
-
junshan_kit/Evaluate_Metrics.py,sha256=Ic3VejsKtGT23ac7QKjRZ3WAETO1KP6JR-EaeiwblJE,1266
|
|
5
|
-
junshan_kit/ModelsHub.py,sha256=z9NyC4PTxo3wCxa2XxOfcjrw9NcDs0LCjBGCp6Z-90s,7084
|
|
6
|
-
junshan_kit/ParametersHub.py,sha256=usM2vu7fBP0n97rNEeJMxhzxRRGHhJMjELrnyJiVvTk,11520
|
|
7
|
-
junshan_kit/Print_Info.py,sha256=yiGc6Qlprj0ds6w2DP7ScAgTBZwswxXqxuIrQ3_liL8,3111
|
|
8
|
-
junshan_kit/TrainingHub.py,sha256=QOQ5BDctGysMbbSOEy6gR-ng0bSmrZl4iJZmj6n52m0,5960
|
|
9
|
-
junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
junshan_kit/kit.py,sha256=F9f5qqn9ve-UVoYtXlFmNGl4YJ3eEy6T1yRrC0s-Wpw,12367
|
|
11
|
-
junshan_kit-2.5.1.dist-info/METADATA,sha256=_gNNCaPWuspBXCD0Ce0maEYKtbO8eaoDXQIhmK2osOI,267
|
|
12
|
-
junshan_kit-2.5.1.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
|
|
13
|
-
junshan_kit-2.5.1.dist-info/RECORD,,
|