junshan-kit 2.4.8__py2.py3-none-any.whl → 2.4.9__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of junshan-kit might be problematic. Click here for more details.
- junshan_kit/DataHub.py +114 -0
- junshan_kit/DataProcessor.py +114 -24
- junshan_kit/DataSets.py +186 -37
- junshan_kit/{Models.py → ModelsHub.py} +5 -0
- junshan_kit/ParametersHub.py +404 -0
- junshan_kit/Print_Info.py +6 -2
- junshan_kit/TrainingHub.py +75 -0
- junshan_kit/kit.py +94 -23
- {junshan_kit-2.4.8.dist-info → junshan_kit-2.4.9.dist-info}/METADATA +2 -2
- junshan_kit-2.4.9.dist-info/RECORD +12 -0
- junshan_kit/ComOptimizers.py +0 -126
- junshan_kit/ExperimentHub.py +0 -338
- junshan_kit/SPBM.py +0 -350
- junshan_kit/SPBM_func.py +0 -601
- junshan_kit/TrainingParas.py +0 -470
- junshan_kit/check_args.py +0 -116
- junshan_kit/datahub.py +0 -281
- junshan_kit-2.4.8.dist-info/RECORD +0 -16
- {junshan_kit-2.4.8.dist-info → junshan_kit-2.4.9.dist-info}/WHEEL +0 -0
junshan_kit/ExperimentHub.py
DELETED
|
@@ -1,338 +0,0 @@
|
|
|
1
|
-
import sys, os, torch, random
|
|
2
|
-
import numpy as np
|
|
3
|
-
import torch.nn as nn
|
|
4
|
-
import torch.utils.data as Data
|
|
5
|
-
from torch.utils.data import Subset, random_split
|
|
6
|
-
from junshan_kit import ComOptimizers, datahub, Models, TrainingParas, SPBM
|
|
7
|
-
|
|
8
|
-
# -------------------------------------
|
|
9
|
-
def set_seed(seed=42):
|
|
10
|
-
torch.manual_seed(seed)
|
|
11
|
-
torch.cuda.manual_seed_all(seed)
|
|
12
|
-
np.random.seed(seed)
|
|
13
|
-
random.seed(seed)
|
|
14
|
-
torch.backends.cudnn.deterministic = True
|
|
15
|
-
torch.backends.cudnn.benchmark = False
|
|
16
|
-
|
|
17
|
-
def device(Paras):
|
|
18
|
-
device = torch.device(f"{Paras['cuda']}" if torch.cuda.is_available() else "cpu")
|
|
19
|
-
Paras["device"] = device
|
|
20
|
-
use_color = sys.stdout.isatty()
|
|
21
|
-
Paras["use_color"] = use_color
|
|
22
|
-
|
|
23
|
-
return Paras
|
|
24
|
-
|
|
25
|
-
# -------------------------------------
|
|
26
|
-
class Train_Steps:
|
|
27
|
-
def __init__(self, args) -> None:
|
|
28
|
-
self.args = args
|
|
29
|
-
|
|
30
|
-
def _model_map(self, model_name):
|
|
31
|
-
model_mapping = self.args.model_mapping
|
|
32
|
-
|
|
33
|
-
return model_mapping[model_name]
|
|
34
|
-
|
|
35
|
-
def get_train_group(self):
|
|
36
|
-
training_group = []
|
|
37
|
-
for cfg in self.args.train_group:
|
|
38
|
-
model, dataset, optimizer = cfg.split("-")
|
|
39
|
-
training_group.append((self._model_map(model), dataset, optimizer))
|
|
40
|
-
|
|
41
|
-
return training_group
|
|
42
|
-
|
|
43
|
-
def set_paras(self, results_folder_name, py_name, time_str, OtherParas):
|
|
44
|
-
Paras = {
|
|
45
|
-
# Name of the folder where results will be saved.
|
|
46
|
-
"results_folder_name": results_folder_name,
|
|
47
|
-
# Whether to draw loss/accuracy figures.
|
|
48
|
-
"DrawFigs": "ON",
|
|
49
|
-
# Whether to use log scale when drawing plots.
|
|
50
|
-
"use_log_scale": "ON",
|
|
51
|
-
# Print loss every N epochs.
|
|
52
|
-
"epoch_log_interval": 1,
|
|
53
|
-
# Timestamp string for result saving.
|
|
54
|
-
"time_str": time_str,
|
|
55
|
-
# Random seed
|
|
56
|
-
"seed": OtherParas['seed'],
|
|
57
|
-
# Device used for training.
|
|
58
|
-
"cuda": f"cuda:{self.args.cuda}",
|
|
59
|
-
|
|
60
|
-
# batch-size
|
|
61
|
-
"batch_size": self.args.bs,
|
|
62
|
-
|
|
63
|
-
# epochs
|
|
64
|
-
"epochs": self.args.e,
|
|
65
|
-
|
|
66
|
-
# split_train_data
|
|
67
|
-
"split_train_data": self.args.s,
|
|
68
|
-
|
|
69
|
-
# select_subset
|
|
70
|
-
"select_subset": self.args.subset,
|
|
71
|
-
|
|
72
|
-
# subset_number_dict
|
|
73
|
-
"subset_number_dict": TrainingParas.subset_number_dict(OtherParas),
|
|
74
|
-
|
|
75
|
-
# validation
|
|
76
|
-
"validation": TrainingParas.validation(),
|
|
77
|
-
|
|
78
|
-
# validation_rate
|
|
79
|
-
"validation_rate": TrainingParas.validation_rate(),
|
|
80
|
-
|
|
81
|
-
# model list
|
|
82
|
-
"model_list" : TrainingParas.model_list(),
|
|
83
|
-
|
|
84
|
-
# model_type
|
|
85
|
-
"model_type": TrainingParas.model_type(),
|
|
86
|
-
|
|
87
|
-
# data_list
|
|
88
|
-
"data_list": TrainingParas.data_list(),
|
|
89
|
-
|
|
90
|
-
# optimizer_dict
|
|
91
|
-
"optimizer_dict": TrainingParas.optimizer_dict(OtherParas)
|
|
92
|
-
}
|
|
93
|
-
Paras["py_name"] = py_name
|
|
94
|
-
|
|
95
|
-
return Paras
|
|
96
|
-
|
|
97
|
-
# <Step_3> : Chosen_loss
|
|
98
|
-
def chosen_loss(self, model_name, Paras):
|
|
99
|
-
# ---------------------------------------------------
|
|
100
|
-
# There have an addition parameter
|
|
101
|
-
if model_name == "LogRegressionBinaryL2":
|
|
102
|
-
Paras["lambda"] = 1e-3
|
|
103
|
-
# ---------------------------------------------------
|
|
104
|
-
|
|
105
|
-
if model_name in ["LeastSquares"]:
|
|
106
|
-
loss_fn = nn.MSELoss()
|
|
107
|
-
|
|
108
|
-
else:
|
|
109
|
-
if Paras["model_type"][model_name] == "binary":
|
|
110
|
-
loss_fn = nn.BCEWithLogitsLoss()
|
|
111
|
-
|
|
112
|
-
elif Paras["model_type"][model_name] == "multi":
|
|
113
|
-
loss_fn = nn.CrossEntropyLoss()
|
|
114
|
-
|
|
115
|
-
else:
|
|
116
|
-
loss_fn = nn.MSELoss()
|
|
117
|
-
print("\033[91m The loss function is error!\033[0m")
|
|
118
|
-
assert False
|
|
119
|
-
Paras["loss_fn"] = loss_fn
|
|
120
|
-
|
|
121
|
-
return loss_fn, Paras
|
|
122
|
-
|
|
123
|
-
# <Step_4> : import data --> step.py
|
|
124
|
-
def load_data(self, model_name, data_name, Paras):
|
|
125
|
-
# load data
|
|
126
|
-
train_path = f"./exp_data/{data_name}/training_data"
|
|
127
|
-
test_path = f"./exp_data/{data_name}/test_data"
|
|
128
|
-
# Paras["train_ratio"] = 1.0
|
|
129
|
-
# Paras["select_subset"].setdefault(data_name, False)
|
|
130
|
-
# Paras["validation"].setdefault(data_name, False)
|
|
131
|
-
|
|
132
|
-
if data_name == "MNIST":
|
|
133
|
-
train_dataset, test_dataset, transform = datahub.MNIST(Paras, model_name)
|
|
134
|
-
|
|
135
|
-
elif data_name == "CIFAR100":
|
|
136
|
-
train_dataset, test_dataset, transform = datahub.CIFAR100(Paras, model_name)
|
|
137
|
-
|
|
138
|
-
elif data_name == "CALTECH101_Resize_32":
|
|
139
|
-
Paras["train_ratio"] = 0.7
|
|
140
|
-
train_dataset, test_dataset, transform = datahub.caltech101_Resize_32(
|
|
141
|
-
Paras["seed"], Paras["train_ratio"], split=True
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
elif data_name in ["Vowel", "Letter", "Shuttle", "w8a"]:
|
|
145
|
-
Paras["train_ratio"] = Paras["split_train_data"][data_name]
|
|
146
|
-
train_dataset, test_dataset, transform = datahub.get_libsvm_data(
|
|
147
|
-
train_path + ".txt", test_path + ".txt", data_name
|
|
148
|
-
)
|
|
149
|
-
|
|
150
|
-
elif data_name in ["RCV1", "Duke", "Ijcnn"]:
|
|
151
|
-
Paras["train_ratio"] = Paras["split_train_data"][data_name]
|
|
152
|
-
train_dataset, test_dataset, transform = datahub.get_libsvm_bz2_data(
|
|
153
|
-
train_path + ".bz2", test_path + ".bz2", data_name, Paras
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
else:
|
|
157
|
-
transform = None
|
|
158
|
-
print(f"The data_name is error!")
|
|
159
|
-
assert False
|
|
160
|
-
|
|
161
|
-
return train_dataset, test_dataset, transform
|
|
162
|
-
# <Step_4>
|
|
163
|
-
|
|
164
|
-
# <subset> : Step 5.1 -->step.py
|
|
165
|
-
def set_subset(self, data_name, Paras, train_dataset, test_dataset):
|
|
166
|
-
if self.args.subset[0]>1:
|
|
167
|
-
train_num = self.args.subset[0]
|
|
168
|
-
test_num = self.args.subset[1]
|
|
169
|
-
train_subset_num = min(train_num, len(train_dataset))
|
|
170
|
-
test_subset_num = min(test_num, len(test_dataset))
|
|
171
|
-
|
|
172
|
-
train_subset_indices = list(range(int(train_subset_num)))
|
|
173
|
-
train_dataset = Subset(train_dataset, train_subset_indices)
|
|
174
|
-
|
|
175
|
-
test_subset_indices = list(range(int(test_subset_num)))
|
|
176
|
-
test_dataset = Subset(test_dataset, test_subset_indices)
|
|
177
|
-
|
|
178
|
-
else:
|
|
179
|
-
train_ratios= self.args.subset[0]
|
|
180
|
-
test_ratios= self.args.subset[1]
|
|
181
|
-
|
|
182
|
-
train_subset_indices = list(range(int(train_ratios * len(train_dataset))))
|
|
183
|
-
train_dataset = Subset(train_dataset, train_subset_indices)
|
|
184
|
-
|
|
185
|
-
test_subset_indices = list(range(int(test_ratios * len(test_dataset))))
|
|
186
|
-
test_dataset = Subset(test_dataset, test_subset_indices)
|
|
187
|
-
|
|
188
|
-
return train_dataset, test_dataset
|
|
189
|
-
|
|
190
|
-
# <validation> : Step 5.2 --> step.py
|
|
191
|
-
def set_val_set(self, data_name, train_dataset, Paras):
|
|
192
|
-
if Paras["validation"][data_name]:
|
|
193
|
-
size_ = len(train_dataset)
|
|
194
|
-
val_size = int(size_ * Paras["validation_rate"][data_name])
|
|
195
|
-
train_size = size_ - val_size
|
|
196
|
-
|
|
197
|
-
train_dataset, val_dataset = random_split(
|
|
198
|
-
train_dataset,
|
|
199
|
-
[train_size, val_size],
|
|
200
|
-
generator=torch.Generator().manual_seed(Paras["seed"]),
|
|
201
|
-
)
|
|
202
|
-
|
|
203
|
-
else:
|
|
204
|
-
val_dataset = Subset(train_dataset, [])
|
|
205
|
-
|
|
206
|
-
return train_dataset, val_dataset
|
|
207
|
-
# <validation>
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
# <get_dataloader> Step 5.3 -->step.py
|
|
211
|
-
def get_dataloader(self, data_name, train_dataset, test_dataset, Paras):
|
|
212
|
-
set_seed(Paras["seed"])
|
|
213
|
-
g = torch.Generator()
|
|
214
|
-
g.manual_seed(Paras["seed"])
|
|
215
|
-
|
|
216
|
-
# Create training DataLoader
|
|
217
|
-
|
|
218
|
-
train_loader = Data.DataLoader(
|
|
219
|
-
dataset=train_dataset,
|
|
220
|
-
shuffle=True,
|
|
221
|
-
batch_size=self.args.bs,
|
|
222
|
-
generator=g,
|
|
223
|
-
num_workers=4,
|
|
224
|
-
)
|
|
225
|
-
|
|
226
|
-
# test loader
|
|
227
|
-
test_loader = Data.DataLoader(
|
|
228
|
-
dataset=test_dataset,
|
|
229
|
-
shuffle=False,
|
|
230
|
-
batch_size=self.args.bs,
|
|
231
|
-
generator=g,
|
|
232
|
-
num_workers=4,
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
return train_loader, test_loader
|
|
236
|
-
# <get_dataloader>
|
|
237
|
-
|
|
238
|
-
def hyperparas_and_path(
|
|
239
|
-
self,
|
|
240
|
-
model_name,
|
|
241
|
-
data_name,
|
|
242
|
-
optimizer_name,
|
|
243
|
-
Paras,
|
|
244
|
-
):
|
|
245
|
-
params_gird = Paras["optimizer_dict"][optimizer_name]["params"]
|
|
246
|
-
keys, values = list(params_gird.keys()), list(params_gird.values())
|
|
247
|
-
|
|
248
|
-
# Set the path for saving results
|
|
249
|
-
folder_path = f'./{Paras["results_folder_name"]}/seed_{Paras["seed"]}/{model_name}/{data_name}/{optimizer_name}/train_{Paras["train_data_num"]}_test_{Paras["test_data_num"]}/Batch_size_{self.args.bs}/epoch_{self.args.e}/{Paras["time_str"]}'
|
|
250
|
-
os.makedirs(folder_path, exist_ok=True)
|
|
251
|
-
|
|
252
|
-
return keys, values, folder_path
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
# <Reloading> Step 7.3 --> step.py
|
|
256
|
-
def reloading_model_dataloader(self,
|
|
257
|
-
base_model_fn,
|
|
258
|
-
initial_state_dict,
|
|
259
|
-
data_name,
|
|
260
|
-
train_dataset,
|
|
261
|
-
test_dataset,
|
|
262
|
-
Paras,
|
|
263
|
-
):
|
|
264
|
-
set_seed(Paras["seed"])
|
|
265
|
-
model = base_model_fn()
|
|
266
|
-
model.load_state_dict(initial_state_dict)
|
|
267
|
-
model.to(Paras["device"])
|
|
268
|
-
train_loader, test_loader = self.get_dataloader(
|
|
269
|
-
data_name, train_dataset, test_dataset, Paras
|
|
270
|
-
)
|
|
271
|
-
|
|
272
|
-
return model, train_loader, test_loader
|
|
273
|
-
# <Reloading>
|
|
274
|
-
|
|
275
|
-
def chosen_optimizer(self, optimizer_name, model, hyperparams, Paras):
|
|
276
|
-
if optimizer_name == "SGD":
|
|
277
|
-
optimizer = torch.optim.SGD(model.parameters(), lr=hyperparams["alpha"])
|
|
278
|
-
|
|
279
|
-
elif optimizer_name == "ADAM":
|
|
280
|
-
optimizer = torch.optim.Adam(
|
|
281
|
-
model.parameters(),
|
|
282
|
-
lr=hyperparams["alpha"],
|
|
283
|
-
betas=(hyperparams["beta1"], hyperparams["beta2"]),
|
|
284
|
-
eps=hyperparams["epsilon"],
|
|
285
|
-
)
|
|
286
|
-
|
|
287
|
-
elif optimizer_name in ["SPBM-TR"]:
|
|
288
|
-
optimizer = SPBM.TR(model.parameters(), model, hyperparams, Paras)
|
|
289
|
-
|
|
290
|
-
elif optimizer_name in ["SPBM-TR-NoneLower"]:
|
|
291
|
-
optimizer = SPBM.TR_NoneLower(model.parameters(), model, hyperparams, Paras)
|
|
292
|
-
|
|
293
|
-
elif optimizer_name in ["SPBM-TR-NoneSpecial"]:
|
|
294
|
-
optimizer = SPBM.TR_NoneSpecial(model.parameters(), model, hyperparams, Paras)
|
|
295
|
-
|
|
296
|
-
elif optimizer_name in ["SPBM-TR-NoneCut"]:
|
|
297
|
-
optimizer = SPBM.TR_NoneCut(model.parameters(), model, hyperparams, Paras)
|
|
298
|
-
|
|
299
|
-
elif optimizer_name in ["SPBM-PF-NoneLower"]:
|
|
300
|
-
optimizer = SPBM.PF_NoneLower(model.parameters(), model, hyperparams, Paras)
|
|
301
|
-
|
|
302
|
-
elif optimizer_name in ["SPBM-PF"]:
|
|
303
|
-
optimizer = SPBM.PF(model.parameters(), model, hyperparams, Paras)
|
|
304
|
-
|
|
305
|
-
elif optimizer_name in ["SPBM-PF-NoneCut"]:
|
|
306
|
-
optimizer = SPBM.PF_NoneCut(model.parameters(), model, hyperparams, Paras)
|
|
307
|
-
|
|
308
|
-
elif optimizer_name in ["SPSmax"]:
|
|
309
|
-
optimizer = ComOptimizers.SPSmax(
|
|
310
|
-
model.parameters(), model, hyperparams, Paras
|
|
311
|
-
)
|
|
312
|
-
|
|
313
|
-
elif optimizer_name in ["ALR-SMAG"]:
|
|
314
|
-
optimizer = ComOptimizers.ALR_SMAG(
|
|
315
|
-
model.parameters(), model, hyperparams, Paras
|
|
316
|
-
)
|
|
317
|
-
|
|
318
|
-
elif optimizer_name in ["Bundle"]:
|
|
319
|
-
optimizer = ComOptimizers.Bundle(
|
|
320
|
-
model.parameters(), model, hyperparams, Paras
|
|
321
|
-
)
|
|
322
|
-
|
|
323
|
-
else:
|
|
324
|
-
raise NotImplementedError(f"{optimizer_name} is not supported.")
|
|
325
|
-
|
|
326
|
-
return optimizer
|
|
327
|
-
|
|
328
|
-
def train(self, train_loader, optimizer, optimizer_name, Paras):
|
|
329
|
-
for epoch in range(self.args.e):
|
|
330
|
-
|
|
331
|
-
for index, (X, Y) in enumerate(train_loader):
|
|
332
|
-
X, Y = X.to(Paras["device"]), Y.to(Paras["device"])
|
|
333
|
-
|
|
334
|
-
if optimizer_name in ["SGD", "ADAM"]:
|
|
335
|
-
print(1)
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
junshan_kit/SPBM.py
DELETED
|
@@ -1,350 +0,0 @@
|
|
|
1
|
-
from junshan_kit import SPBM, SPBM_func
|
|
2
|
-
import torch, time, os
|
|
3
|
-
from torch.optim.optimizer import Optimizer
|
|
4
|
-
from torch.nn.utils import parameters_to_vector, vector_to_parameters
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class PF(Optimizer):
|
|
8
|
-
def __init__(self, params, model, hyperparams, Paras):
|
|
9
|
-
defaults = dict()
|
|
10
|
-
super().__init__(params, defaults)
|
|
11
|
-
self.model = model
|
|
12
|
-
self.cutting_num = hyperparams['cutting_number']
|
|
13
|
-
self.M = hyperparams['M']
|
|
14
|
-
self.delta = hyperparams['delta']
|
|
15
|
-
self.Paras = Paras
|
|
16
|
-
|
|
17
|
-
self.x_his, self.g_his, self.f_his = [], [], []
|
|
18
|
-
|
|
19
|
-
def step(self, closure=None):
|
|
20
|
-
if closure is None:
|
|
21
|
-
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
|
22
|
-
|
|
23
|
-
# 清零梯度并前向计算
|
|
24
|
-
loss = closure()
|
|
25
|
-
|
|
26
|
-
with torch.no_grad():
|
|
27
|
-
xk = parameters_to_vector(self.model.parameters())
|
|
28
|
-
# print(torch.norm(xk))
|
|
29
|
-
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
|
30
|
-
|
|
31
|
-
# Add cutting plane
|
|
32
|
-
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
|
33
|
-
|
|
34
|
-
## Cut selection
|
|
35
|
-
selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
|
36
|
-
|
|
37
|
-
# the coefficient of dual problem
|
|
38
|
-
Gk, rk, ek = SPBM_func.get_var(selected_x, selected_f, selected_g, self.delta)
|
|
39
|
-
|
|
40
|
-
# SOVER (dual)
|
|
41
|
-
xk = SPBM_func.subproblem_pf(Gk, ek, xk, self.delta, self.Paras)
|
|
42
|
-
|
|
43
|
-
# print(len(self.f_his))
|
|
44
|
-
vector_to_parameters(xk, self.model.parameters())
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
# 暂时返回 loss(tensor 类型)
|
|
48
|
-
return loss
|
|
49
|
-
|
|
50
|
-
# <SPBM-TR>
|
|
51
|
-
class TR(Optimizer):
|
|
52
|
-
def __init__(self, params, model, hyperparams, Paras):
|
|
53
|
-
defaults = dict()
|
|
54
|
-
super().__init__(params, defaults)
|
|
55
|
-
self.model = model
|
|
56
|
-
self.cutting_num = hyperparams['cutting_number']
|
|
57
|
-
self.M = hyperparams['M']
|
|
58
|
-
self.delta = hyperparams['delta']
|
|
59
|
-
self.Paras = Paras
|
|
60
|
-
|
|
61
|
-
self.x_his, self.g_his, self.f_his = [], [], []
|
|
62
|
-
|
|
63
|
-
def step(self, closure=None):
|
|
64
|
-
if closure is None:
|
|
65
|
-
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
|
66
|
-
|
|
67
|
-
# Reset the gradient and perform forward computation
|
|
68
|
-
loss = closure()
|
|
69
|
-
|
|
70
|
-
with torch.no_grad():
|
|
71
|
-
xk = parameters_to_vector(self.model.parameters())
|
|
72
|
-
# print(torch.norm(xk))
|
|
73
|
-
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
|
74
|
-
|
|
75
|
-
# Add cutting plane
|
|
76
|
-
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
|
77
|
-
|
|
78
|
-
## Cut selection
|
|
79
|
-
selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
|
80
|
-
|
|
81
|
-
# the coefficient of dual problem
|
|
82
|
-
Gk, rk, ek = SPBM_func.get_var(selected_x, selected_f, selected_g, self.delta)
|
|
83
|
-
|
|
84
|
-
# SOVER (dual)
|
|
85
|
-
xk = SPBM_func.subproblem_tr_2(Gk, ek, xk, rk, self.Paras)
|
|
86
|
-
|
|
87
|
-
# print(len(self.f_his))
|
|
88
|
-
vector_to_parameters(xk, self.model.parameters())
|
|
89
|
-
|
|
90
|
-
# tensor type
|
|
91
|
-
return loss
|
|
92
|
-
# <SPBM-TR>
|
|
93
|
-
|
|
94
|
-
# <SPBM-TR_NoneSpecial>
|
|
95
|
-
class TR_NoneSpecial(Optimizer):
|
|
96
|
-
def __init__(self, params, model, hyperparams, Paras):
|
|
97
|
-
defaults = dict()
|
|
98
|
-
super().__init__(params, defaults)
|
|
99
|
-
self.model = model
|
|
100
|
-
self.cutting_num = hyperparams['cutting_number']
|
|
101
|
-
self.M = hyperparams['M']
|
|
102
|
-
self.delta = hyperparams['delta']
|
|
103
|
-
self.Paras = Paras
|
|
104
|
-
|
|
105
|
-
self.x_his, self.g_his, self.f_his = [], [], []
|
|
106
|
-
|
|
107
|
-
def step(self, closure=None):
|
|
108
|
-
if closure is None:
|
|
109
|
-
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
|
110
|
-
|
|
111
|
-
# Reset the gradient and perform forward computation
|
|
112
|
-
loss = closure()
|
|
113
|
-
|
|
114
|
-
with torch.no_grad():
|
|
115
|
-
xk = parameters_to_vector(self.model.parameters())
|
|
116
|
-
# print(torch.norm(xk))
|
|
117
|
-
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
|
118
|
-
|
|
119
|
-
# Add cutting plane
|
|
120
|
-
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
|
121
|
-
|
|
122
|
-
## Cut selection
|
|
123
|
-
selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
|
124
|
-
|
|
125
|
-
# the coefficient of dual problem
|
|
126
|
-
Gk, rk, ek = SPBM_func.get_var(selected_x, selected_f, selected_g, self.delta)
|
|
127
|
-
|
|
128
|
-
# SOVER (dual)
|
|
129
|
-
xk = SPBM_func.subproblem_tr_NoneSpecial(Gk, ek, xk, rk, self.Paras)
|
|
130
|
-
|
|
131
|
-
# print(len(self.f_his))
|
|
132
|
-
vector_to_parameters(xk, self.model.parameters())
|
|
133
|
-
|
|
134
|
-
# tensor type
|
|
135
|
-
return loss
|
|
136
|
-
# <SPBM-TR_NoneSpecial>
|
|
137
|
-
|
|
138
|
-
class TR_primal(Optimizer):
|
|
139
|
-
def __init__(self, params, model, hyperparams, Paras):
|
|
140
|
-
defaults = dict()
|
|
141
|
-
super().__init__(params, defaults)
|
|
142
|
-
self.model = model
|
|
143
|
-
self.cutting_num = hyperparams['cutting_number']
|
|
144
|
-
self.M = hyperparams['M']
|
|
145
|
-
self.delta = hyperparams['delta']
|
|
146
|
-
self.Paras = Paras
|
|
147
|
-
|
|
148
|
-
self.x_his, self.g_his, self.f_his = [], [], []
|
|
149
|
-
|
|
150
|
-
def step(self, closure=None):
|
|
151
|
-
if closure is None:
|
|
152
|
-
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
|
153
|
-
|
|
154
|
-
# Reset the gradient and perform forward computation
|
|
155
|
-
loss = closure()
|
|
156
|
-
|
|
157
|
-
with torch.no_grad():
|
|
158
|
-
xk = parameters_to_vector(self.model.parameters())
|
|
159
|
-
# print(torch.norm(xk))
|
|
160
|
-
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
|
161
|
-
|
|
162
|
-
# Add cutting plane
|
|
163
|
-
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
|
164
|
-
|
|
165
|
-
## Cut selection
|
|
166
|
-
selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
|
167
|
-
|
|
168
|
-
# the coefficient of dual problem
|
|
169
|
-
Gk, rk, ek = SPBM_func.get_var(selected_x, selected_f, selected_g, self.delta)
|
|
170
|
-
|
|
171
|
-
# SOVER (dual)
|
|
172
|
-
xk = SPBM_func.subproblem_tr_primal(Gk, ek, xk, rk, self.Paras)
|
|
173
|
-
|
|
174
|
-
# print(len(self.f_his))
|
|
175
|
-
vector_to_parameters(xk, self.model.parameters())
|
|
176
|
-
|
|
177
|
-
# tensor type
|
|
178
|
-
return loss
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
class TR_NoneLower(Optimizer):
|
|
182
|
-
def __init__(self, params, model, hyperparams, Paras):
|
|
183
|
-
defaults = dict()
|
|
184
|
-
super().__init__(params, defaults)
|
|
185
|
-
self.model = model
|
|
186
|
-
self.cutting_num = hyperparams['cutting_number']
|
|
187
|
-
self.M = hyperparams['M']
|
|
188
|
-
self.delta = hyperparams['delta']
|
|
189
|
-
self.Paras = Paras
|
|
190
|
-
|
|
191
|
-
self.x_his, self.g_his, self.f_his = [], [], []
|
|
192
|
-
|
|
193
|
-
def step(self, closure=None):
|
|
194
|
-
if closure is None:
|
|
195
|
-
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
|
196
|
-
|
|
197
|
-
# Reset the gradient and perform forward computation
|
|
198
|
-
loss = closure()
|
|
199
|
-
|
|
200
|
-
with torch.no_grad():
|
|
201
|
-
xk = parameters_to_vector(self.model.parameters())
|
|
202
|
-
# print(torch.norm(xk))
|
|
203
|
-
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
|
204
|
-
|
|
205
|
-
# Add cutting plane
|
|
206
|
-
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
|
207
|
-
|
|
208
|
-
## Cut selection
|
|
209
|
-
selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
|
210
|
-
|
|
211
|
-
# the coefficient of dual problem
|
|
212
|
-
Gk, rk, ek = SPBM_func.get_var(selected_x, selected_f, selected_g, self.delta)
|
|
213
|
-
|
|
214
|
-
# SOVER (dual)
|
|
215
|
-
xk = SPBM_func.subproblem_tr_NoneLower(Gk, ek, xk, rk, self.Paras)
|
|
216
|
-
|
|
217
|
-
# print(len(self.f_his))
|
|
218
|
-
vector_to_parameters(xk, self.model.parameters())
|
|
219
|
-
|
|
220
|
-
# tensor type
|
|
221
|
-
return loss
|
|
222
|
-
|
|
223
|
-
class TR_NoneCut(Optimizer):
|
|
224
|
-
def __init__(self, params, model, hyperparams, Paras):
|
|
225
|
-
defaults = dict()
|
|
226
|
-
super().__init__(params, defaults)
|
|
227
|
-
self.model = model
|
|
228
|
-
self.cutting_num = hyperparams['cutting_number']
|
|
229
|
-
self.M = hyperparams['M']
|
|
230
|
-
self.delta = hyperparams['delta']
|
|
231
|
-
self.Paras = Paras
|
|
232
|
-
|
|
233
|
-
self.x_his, self.g_his, self.f_his = [], [], []
|
|
234
|
-
|
|
235
|
-
def step(self, closure=None):
|
|
236
|
-
if closure is None:
|
|
237
|
-
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
|
238
|
-
|
|
239
|
-
# Reset the gradient and perform forward computation
|
|
240
|
-
loss = closure()
|
|
241
|
-
|
|
242
|
-
with torch.no_grad():
|
|
243
|
-
xk = parameters_to_vector(self.model.parameters())
|
|
244
|
-
# print(torch.norm(xk))
|
|
245
|
-
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
|
246
|
-
|
|
247
|
-
# Add cutting plane
|
|
248
|
-
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
|
249
|
-
|
|
250
|
-
# ## Cut selection
|
|
251
|
-
# selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
|
252
|
-
|
|
253
|
-
# the coefficient of dual problem
|
|
254
|
-
Gk, rk, ek = SPBM_func.get_var(x_his, f_his, g_his, self.delta)
|
|
255
|
-
|
|
256
|
-
# SOVER (dual)
|
|
257
|
-
xk = SPBM_func.subproblem_tr_NoneLower(Gk, ek, xk, rk, self.Paras)
|
|
258
|
-
|
|
259
|
-
# print(len(self.f_his))
|
|
260
|
-
vector_to_parameters(xk, self.model.parameters())
|
|
261
|
-
|
|
262
|
-
# tensor type
|
|
263
|
-
return loss
|
|
264
|
-
|
|
265
|
-
# ************************** SPBM-PF **************************
|
|
266
|
-
class PF_NoneLower(Optimizer):
|
|
267
|
-
def __init__(self, params, model, hyperparams, Paras):
|
|
268
|
-
defaults = dict()
|
|
269
|
-
super().__init__(params, defaults)
|
|
270
|
-
self.model = model
|
|
271
|
-
self.cutting_num = hyperparams['cutting_number']
|
|
272
|
-
self.M = hyperparams['M']
|
|
273
|
-
self.delta = hyperparams['delta']
|
|
274
|
-
self.Paras = Paras
|
|
275
|
-
|
|
276
|
-
self.x_his, self.g_his, self.f_his = [], [], []
|
|
277
|
-
|
|
278
|
-
def step(self, closure=None):
|
|
279
|
-
if closure is None:
|
|
280
|
-
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
|
281
|
-
|
|
282
|
-
# Reset the gradient and perform forward computation
|
|
283
|
-
loss = closure()
|
|
284
|
-
|
|
285
|
-
with torch.no_grad():
|
|
286
|
-
xk = parameters_to_vector(self.model.parameters())
|
|
287
|
-
# print(torch.norm(xk))
|
|
288
|
-
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
|
289
|
-
|
|
290
|
-
# Add cutting plane
|
|
291
|
-
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
|
292
|
-
|
|
293
|
-
## Cut selection
|
|
294
|
-
selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
|
295
|
-
|
|
296
|
-
# the coefficient of dual problem
|
|
297
|
-
Gk, rk, ek = SPBM_func.get_var(selected_x, selected_f, selected_g, self.delta)
|
|
298
|
-
|
|
299
|
-
# SOVER (dual)
|
|
300
|
-
xk = SPBM_func.subproblem_pf_NoneLower(Gk, ek, xk, self.delta, self.Paras)
|
|
301
|
-
|
|
302
|
-
# print(len(self.f_his))
|
|
303
|
-
vector_to_parameters(xk, self.model.parameters())
|
|
304
|
-
|
|
305
|
-
# tensor type
|
|
306
|
-
return loss
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
class PF_NoneCut(Optimizer):
|
|
310
|
-
def __init__(self, params, model, hyperparams, Paras):
|
|
311
|
-
defaults = dict()
|
|
312
|
-
super().__init__(params, defaults)
|
|
313
|
-
self.model = model
|
|
314
|
-
self.cutting_num = hyperparams['cutting_number']
|
|
315
|
-
self.M = hyperparams['M']
|
|
316
|
-
self.delta = hyperparams['delta']
|
|
317
|
-
self.Paras = Paras
|
|
318
|
-
|
|
319
|
-
self.x_his, self.g_his, self.f_his = [], [], []
|
|
320
|
-
|
|
321
|
-
def step(self, closure=None):
|
|
322
|
-
if closure is None:
|
|
323
|
-
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
|
324
|
-
|
|
325
|
-
# Reset the gradient and perform forward computation
|
|
326
|
-
loss = closure()
|
|
327
|
-
|
|
328
|
-
with torch.no_grad():
|
|
329
|
-
xk = parameters_to_vector(self.model.parameters())
|
|
330
|
-
# print(torch.norm(xk))
|
|
331
|
-
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
|
332
|
-
|
|
333
|
-
# Add cutting plane
|
|
334
|
-
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
|
335
|
-
|
|
336
|
-
# ## Cut selection
|
|
337
|
-
# selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
|
338
|
-
|
|
339
|
-
# the coefficient of dual problem
|
|
340
|
-
Gk, rk, ek = SPBM_func.get_var(x_his, f_his, g_his, self.delta)
|
|
341
|
-
|
|
342
|
-
# SOVER (dual)
|
|
343
|
-
xk = SPBM_func.subproblem_pf_NoneLower(Gk, ek, xk, self.delta, self.Paras)
|
|
344
|
-
|
|
345
|
-
# print(len(self.f_his))
|
|
346
|
-
vector_to_parameters(xk, self.model.parameters())
|
|
347
|
-
|
|
348
|
-
# tensor type
|
|
349
|
-
return loss
|
|
350
|
-
|