junshan-kit 2.4.0__py2.py3-none-any.whl → 2.4.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- junshan_kit/{Optimizers.py → ComOptimizers.py} +42 -1
- junshan_kit/DataProcessor.py +14 -10
- junshan_kit/DataSets.py +70 -220
- junshan_kit/ExperimentHub.py +141 -7
- junshan_kit/Print_Info.py +3 -3
- junshan_kit/SPBM.py +350 -0
- junshan_kit/SPBM_func.py +601 -0
- junshan_kit/check_args.py +1 -1
- {junshan_kit-2.4.0.dist-info → junshan_kit-2.4.1.dist-info}/METADATA +1 -1
- junshan_kit-2.4.1.dist-info/RECORD +16 -0
- junshan_kit-2.4.0.dist-info/RECORD +0 -14
- {junshan_kit-2.4.0.dist-info → junshan_kit-2.4.1.dist-info}/WHEEL +0 -0
junshan_kit/ExperimentHub.py
CHANGED
@@ -1,13 +1,9 @@
|
|
1
1
|
import sys, os, torch, random
|
2
2
|
import numpy as np
|
3
3
|
import torch.nn as nn
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
# script_dir = os.path.dirname(os.path.abspath(__file__))
|
8
|
-
# sys.path.append(os.path.join(script_dir, 'src'))
|
9
|
-
|
10
|
-
from junshan_kit import datahub, Models, TrainingParas
|
4
|
+
import torch.utils.data as Data
|
5
|
+
from torch.utils.data import Subset, random_split
|
6
|
+
from junshan_kit import ComOptimizers, datahub, Models, TrainingParas, SPBM
|
11
7
|
|
12
8
|
# -------------------------------------
|
13
9
|
def set_seed(seed=42):
|
@@ -191,4 +187,142 @@ class Train_Steps:
|
|
191
187
|
|
192
188
|
return train_dataset, test_dataset
|
193
189
|
|
190
|
+
# <validation> : Step 5.2 --> step.py
|
191
|
+
def set_val_set(self, data_name, train_dataset, Paras):
|
192
|
+
if Paras["validation"][data_name]:
|
193
|
+
size_ = len(train_dataset)
|
194
|
+
val_size = int(size_ * Paras["validation_rate"][data_name])
|
195
|
+
train_size = size_ - val_size
|
196
|
+
|
197
|
+
train_dataset, val_dataset = random_split(
|
198
|
+
train_dataset,
|
199
|
+
[train_size, val_size],
|
200
|
+
generator=torch.Generator().manual_seed(Paras["seed"]),
|
201
|
+
)
|
202
|
+
|
203
|
+
else:
|
204
|
+
val_dataset = Subset(train_dataset, [])
|
205
|
+
|
206
|
+
return train_dataset, val_dataset
|
207
|
+
# <validation>
|
208
|
+
|
209
|
+
|
210
|
+
# <get_dataloader> Step 5.3 -->step.py
|
211
|
+
def get_dataloader(self, data_name, train_dataset, test_dataset, Paras):
|
212
|
+
set_seed(Paras["seed"])
|
213
|
+
g = torch.Generator()
|
214
|
+
g.manual_seed(Paras["seed"])
|
215
|
+
|
216
|
+
# Create training DataLoader
|
217
|
+
|
218
|
+
train_loader = Data.DataLoader(
|
219
|
+
dataset=train_dataset,
|
220
|
+
shuffle=True,
|
221
|
+
batch_size=self.args.bs,
|
222
|
+
generator=g,
|
223
|
+
num_workers=4,
|
224
|
+
)
|
225
|
+
|
226
|
+
# test loader
|
227
|
+
test_loader = Data.DataLoader(
|
228
|
+
dataset=test_dataset,
|
229
|
+
shuffle=False,
|
230
|
+
batch_size=self.args.bs,
|
231
|
+
generator=g,
|
232
|
+
num_workers=4,
|
233
|
+
)
|
234
|
+
|
235
|
+
return train_loader, test_loader
|
236
|
+
# <get_dataloader>
|
237
|
+
|
238
|
+
def hyperparas_and_path(
|
239
|
+
self,
|
240
|
+
model_name,
|
241
|
+
data_name,
|
242
|
+
optimizer_name,
|
243
|
+
Paras,
|
244
|
+
):
|
245
|
+
params_gird = Paras["optimizer_dict"][optimizer_name]["params"]
|
246
|
+
keys, values = list(params_gird.keys()), list(params_gird.values())
|
247
|
+
|
248
|
+
# Set the path for saving results
|
249
|
+
folder_path = f'./{Paras["results_folder_name"]}/seed_{Paras["seed"]}/{model_name}/{data_name}/{optimizer_name}/train_{Paras["train_data_num"]}_test_{Paras["test_data_num"]}/Batch_size_{self.args.bs}/epoch_{self.args.e}/{Paras["time_str"]}'
|
250
|
+
os.makedirs(folder_path, exist_ok=True)
|
251
|
+
|
252
|
+
return keys, values, folder_path
|
253
|
+
|
254
|
+
|
255
|
+
# <Reloading> Step 7.3 --> step.py
|
256
|
+
def reloading_model_dataloader(self,
|
257
|
+
base_model_fn,
|
258
|
+
initial_state_dict,
|
259
|
+
data_name,
|
260
|
+
train_dataset,
|
261
|
+
test_dataset,
|
262
|
+
Paras,
|
263
|
+
):
|
264
|
+
set_seed(Paras["seed"])
|
265
|
+
model = base_model_fn()
|
266
|
+
model.load_state_dict(initial_state_dict)
|
267
|
+
model.to(Paras["device"])
|
268
|
+
train_loader, test_loader = self.get_dataloader(
|
269
|
+
data_name, train_dataset, test_dataset, Paras
|
270
|
+
)
|
271
|
+
|
272
|
+
return model, train_loader, test_loader
|
273
|
+
# <Reloading>
|
274
|
+
|
275
|
+
def chosen_optimizer(self, optimizer_name, model, hyperparams, Paras):
|
276
|
+
if optimizer_name == "SGD":
|
277
|
+
optimizer = torch.optim.SGD(model.parameters(), lr=hyperparams["alpha"])
|
278
|
+
|
279
|
+
elif optimizer_name == "ADAM":
|
280
|
+
optimizer = torch.optim.Adam(
|
281
|
+
model.parameters(),
|
282
|
+
lr=hyperparams["alpha"],
|
283
|
+
betas=(hyperparams["beta1"], hyperparams["beta2"]),
|
284
|
+
eps=hyperparams["epsilon"],
|
285
|
+
)
|
286
|
+
|
287
|
+
elif optimizer_name in ["SPBM-TR"]:
|
288
|
+
optimizer = SPBM.TR(model.parameters(), model, hyperparams, Paras)
|
289
|
+
|
290
|
+
elif optimizer_name in ["SPBM-TR-NoneLower"]:
|
291
|
+
optimizer = SPBM.TR_NoneLower(model.parameters(), model, hyperparams, Paras)
|
292
|
+
|
293
|
+
elif optimizer_name in ["SPBM-TR-NoneSpecial"]:
|
294
|
+
optimizer = SPBM.TR_NoneSpecial(model.parameters(), model, hyperparams, Paras)
|
295
|
+
|
296
|
+
elif optimizer_name in ["SPBM-TR-NoneCut"]:
|
297
|
+
optimizer = SPBM.TR_NoneCut(model.parameters(), model, hyperparams, Paras)
|
298
|
+
|
299
|
+
elif optimizer_name in ["SPBM-PF-NoneLower"]:
|
300
|
+
optimizer = SPBM.PF_NoneLower(model.parameters(), model, hyperparams, Paras)
|
301
|
+
|
302
|
+
elif optimizer_name in ["SPBM-PF"]:
|
303
|
+
optimizer = SPBM.PF(model.parameters(), model, hyperparams, Paras)
|
304
|
+
|
305
|
+
elif optimizer_name in ["SPBM-PF-NoneCut"]:
|
306
|
+
optimizer = SPBM.PF_NoneCut(model.parameters(), model, hyperparams, Paras)
|
307
|
+
|
308
|
+
elif optimizer_name in ["SPSmax"]:
|
309
|
+
optimizer = ComOptimizers.SPSmax(
|
310
|
+
model.parameters(), model, hyperparams, Paras
|
311
|
+
)
|
194
312
|
|
313
|
+
elif optimizer_name in ["ALR-SMAG"]:
|
314
|
+
optimizer = ComOptimizers.ALR_SMAG(
|
315
|
+
model.parameters(), model, hyperparams, Paras
|
316
|
+
)
|
317
|
+
|
318
|
+
elif optimizer_name in ["Bundle"]:
|
319
|
+
optimizer = ComOptimizers.Bundle(
|
320
|
+
model.parameters(), model, hyperparams, Paras
|
321
|
+
)
|
322
|
+
|
323
|
+
else:
|
324
|
+
raise NotImplementedError(f"{optimizer_name} is not supported.")
|
325
|
+
|
326
|
+
return optimizer
|
327
|
+
|
328
|
+
|
junshan_kit/Print_Info.py
CHANGED
@@ -9,17 +9,17 @@ def training_group(training_group):
|
|
9
9
|
print(f"-------------------------------------------------------")
|
10
10
|
|
11
11
|
|
12
|
-
def training_info(use_color, data_name, optimizer_name, folder_path, hyperparams, Paras, model_name):
|
12
|
+
def training_info(args, use_color, data_name, optimizer_name, folder_path, hyperparams, Paras, model_name):
|
13
13
|
if use_color:
|
14
14
|
print("\033[90m" + "-" * 115 + "\033[0m")
|
15
15
|
print(
|
16
|
-
f"\033[32m✅ \033[34mDataset:\033[32m {data_name}, \t\033[34mBatch-size:\033[32m {
|
16
|
+
f"\033[32m✅ \033[34mDataset:\033[32m {data_name}, \t\033[34mBatch-size:\033[32m {args.bs}, \t\033[34m(training, test) = \033[32m ({Paras['train_data_num']}, {Paras['test_data_num']}), \t\033[34m device:\033[32m {Paras['device']}"
|
17
17
|
)
|
18
18
|
print(
|
19
19
|
f"\033[32m✅ \033[34mOptimizer:\033[32m {optimizer_name}, \t\033[34mParams:\033[32m {hyperparams}"
|
20
20
|
)
|
21
21
|
print(
|
22
|
-
f'\033[32m✅ \033[34mmodel:\033[32m {model_name}, \t\033[34mmodel type:\033[32m {Paras["model_type"][model_name]},\t\033[34m loss_fn:\033[32m {Paras["loss_fn"]}
|
22
|
+
f'\033[32m✅ \033[34mmodel:\033[32m {model_name}, \t\033[34mmodel type:\033[32m {Paras["model_type"][model_name]},\t\033[34m loss_fn:\033[32m {Paras["loss_fn"]}'
|
23
23
|
)
|
24
24
|
print(f"\033[32m✅ \033[34mfolder_path:\033[32m {folder_path}")
|
25
25
|
print("\033[90m" + "-" * 115 + "\033[0m")
|
junshan_kit/SPBM.py
ADDED
@@ -0,0 +1,350 @@
|
|
1
|
+
from junshan_kit import SPBM, SPBM_func
|
2
|
+
import torch, time, os
|
3
|
+
from torch.optim.optimizer import Optimizer
|
4
|
+
from torch.nn.utils import parameters_to_vector, vector_to_parameters
|
5
|
+
|
6
|
+
|
7
|
+
class PF(Optimizer):
|
8
|
+
def __init__(self, params, model, hyperparams, Paras):
|
9
|
+
defaults = dict()
|
10
|
+
super().__init__(params, defaults)
|
11
|
+
self.model = model
|
12
|
+
self.cutting_num = hyperparams['cutting_number']
|
13
|
+
self.M = hyperparams['M']
|
14
|
+
self.delta = hyperparams['delta']
|
15
|
+
self.Paras = Paras
|
16
|
+
|
17
|
+
self.x_his, self.g_his, self.f_his = [], [], []
|
18
|
+
|
19
|
+
def step(self, closure=None):
|
20
|
+
if closure is None:
|
21
|
+
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
22
|
+
|
23
|
+
# 清零梯度并前向计算
|
24
|
+
loss = closure()
|
25
|
+
|
26
|
+
with torch.no_grad():
|
27
|
+
xk = parameters_to_vector(self.model.parameters())
|
28
|
+
# print(torch.norm(xk))
|
29
|
+
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
30
|
+
|
31
|
+
# Add cutting plane
|
32
|
+
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
33
|
+
|
34
|
+
## Cut selection
|
35
|
+
selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
36
|
+
|
37
|
+
# the coefficient of dual problem
|
38
|
+
Gk, rk, ek = SPBM_func.get_var(selected_x, selected_f, selected_g, self.delta)
|
39
|
+
|
40
|
+
# SOVER (dual)
|
41
|
+
xk = SPBM_func.subproblem_pf(Gk, ek, xk, self.delta, self.Paras)
|
42
|
+
|
43
|
+
# print(len(self.f_his))
|
44
|
+
vector_to_parameters(xk, self.model.parameters())
|
45
|
+
|
46
|
+
|
47
|
+
# 暂时返回 loss(tensor 类型)
|
48
|
+
return loss
|
49
|
+
|
50
|
+
# <SPBM-TR>
|
51
|
+
class TR(Optimizer):
|
52
|
+
def __init__(self, params, model, hyperparams, Paras):
|
53
|
+
defaults = dict()
|
54
|
+
super().__init__(params, defaults)
|
55
|
+
self.model = model
|
56
|
+
self.cutting_num = hyperparams['cutting_number']
|
57
|
+
self.M = hyperparams['M']
|
58
|
+
self.delta = hyperparams['delta']
|
59
|
+
self.Paras = Paras
|
60
|
+
|
61
|
+
self.x_his, self.g_his, self.f_his = [], [], []
|
62
|
+
|
63
|
+
def step(self, closure=None):
|
64
|
+
if closure is None:
|
65
|
+
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
66
|
+
|
67
|
+
# Reset the gradient and perform forward computation
|
68
|
+
loss = closure()
|
69
|
+
|
70
|
+
with torch.no_grad():
|
71
|
+
xk = parameters_to_vector(self.model.parameters())
|
72
|
+
# print(torch.norm(xk))
|
73
|
+
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
74
|
+
|
75
|
+
# Add cutting plane
|
76
|
+
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
77
|
+
|
78
|
+
## Cut selection
|
79
|
+
selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
80
|
+
|
81
|
+
# the coefficient of dual problem
|
82
|
+
Gk, rk, ek = SPBM_func.get_var(selected_x, selected_f, selected_g, self.delta)
|
83
|
+
|
84
|
+
# SOVER (dual)
|
85
|
+
xk = SPBM_func.subproblem_tr_2(Gk, ek, xk, rk, self.Paras)
|
86
|
+
|
87
|
+
# print(len(self.f_his))
|
88
|
+
vector_to_parameters(xk, self.model.parameters())
|
89
|
+
|
90
|
+
# tensor type
|
91
|
+
return loss
|
92
|
+
# <SPBM-TR>
|
93
|
+
|
94
|
+
# <SPBM-TR_NoneSpecial>
|
95
|
+
class TR_NoneSpecial(Optimizer):
|
96
|
+
def __init__(self, params, model, hyperparams, Paras):
|
97
|
+
defaults = dict()
|
98
|
+
super().__init__(params, defaults)
|
99
|
+
self.model = model
|
100
|
+
self.cutting_num = hyperparams['cutting_number']
|
101
|
+
self.M = hyperparams['M']
|
102
|
+
self.delta = hyperparams['delta']
|
103
|
+
self.Paras = Paras
|
104
|
+
|
105
|
+
self.x_his, self.g_his, self.f_his = [], [], []
|
106
|
+
|
107
|
+
def step(self, closure=None):
|
108
|
+
if closure is None:
|
109
|
+
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
110
|
+
|
111
|
+
# Reset the gradient and perform forward computation
|
112
|
+
loss = closure()
|
113
|
+
|
114
|
+
with torch.no_grad():
|
115
|
+
xk = parameters_to_vector(self.model.parameters())
|
116
|
+
# print(torch.norm(xk))
|
117
|
+
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
118
|
+
|
119
|
+
# Add cutting plane
|
120
|
+
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
121
|
+
|
122
|
+
## Cut selection
|
123
|
+
selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
124
|
+
|
125
|
+
# the coefficient of dual problem
|
126
|
+
Gk, rk, ek = SPBM_func.get_var(selected_x, selected_f, selected_g, self.delta)
|
127
|
+
|
128
|
+
# SOVER (dual)
|
129
|
+
xk = SPBM_func.subproblem_tr_NoneSpecial(Gk, ek, xk, rk, self.Paras)
|
130
|
+
|
131
|
+
# print(len(self.f_his))
|
132
|
+
vector_to_parameters(xk, self.model.parameters())
|
133
|
+
|
134
|
+
# tensor type
|
135
|
+
return loss
|
136
|
+
# <SPBM-TR_NoneSpecial>
|
137
|
+
|
138
|
+
class TR_primal(Optimizer):
|
139
|
+
def __init__(self, params, model, hyperparams, Paras):
|
140
|
+
defaults = dict()
|
141
|
+
super().__init__(params, defaults)
|
142
|
+
self.model = model
|
143
|
+
self.cutting_num = hyperparams['cutting_number']
|
144
|
+
self.M = hyperparams['M']
|
145
|
+
self.delta = hyperparams['delta']
|
146
|
+
self.Paras = Paras
|
147
|
+
|
148
|
+
self.x_his, self.g_his, self.f_his = [], [], []
|
149
|
+
|
150
|
+
def step(self, closure=None):
|
151
|
+
if closure is None:
|
152
|
+
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
153
|
+
|
154
|
+
# Reset the gradient and perform forward computation
|
155
|
+
loss = closure()
|
156
|
+
|
157
|
+
with torch.no_grad():
|
158
|
+
xk = parameters_to_vector(self.model.parameters())
|
159
|
+
# print(torch.norm(xk))
|
160
|
+
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
161
|
+
|
162
|
+
# Add cutting plane
|
163
|
+
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
164
|
+
|
165
|
+
## Cut selection
|
166
|
+
selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
167
|
+
|
168
|
+
# the coefficient of dual problem
|
169
|
+
Gk, rk, ek = SPBM_func.get_var(selected_x, selected_f, selected_g, self.delta)
|
170
|
+
|
171
|
+
# SOVER (dual)
|
172
|
+
xk = SPBM_func.subproblem_tr_primal(Gk, ek, xk, rk, self.Paras)
|
173
|
+
|
174
|
+
# print(len(self.f_his))
|
175
|
+
vector_to_parameters(xk, self.model.parameters())
|
176
|
+
|
177
|
+
# tensor type
|
178
|
+
return loss
|
179
|
+
|
180
|
+
|
181
|
+
class TR_NoneLower(Optimizer):
|
182
|
+
def __init__(self, params, model, hyperparams, Paras):
|
183
|
+
defaults = dict()
|
184
|
+
super().__init__(params, defaults)
|
185
|
+
self.model = model
|
186
|
+
self.cutting_num = hyperparams['cutting_number']
|
187
|
+
self.M = hyperparams['M']
|
188
|
+
self.delta = hyperparams['delta']
|
189
|
+
self.Paras = Paras
|
190
|
+
|
191
|
+
self.x_his, self.g_his, self.f_his = [], [], []
|
192
|
+
|
193
|
+
def step(self, closure=None):
|
194
|
+
if closure is None:
|
195
|
+
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
196
|
+
|
197
|
+
# Reset the gradient and perform forward computation
|
198
|
+
loss = closure()
|
199
|
+
|
200
|
+
with torch.no_grad():
|
201
|
+
xk = parameters_to_vector(self.model.parameters())
|
202
|
+
# print(torch.norm(xk))
|
203
|
+
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
204
|
+
|
205
|
+
# Add cutting plane
|
206
|
+
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
207
|
+
|
208
|
+
## Cut selection
|
209
|
+
selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
210
|
+
|
211
|
+
# the coefficient of dual problem
|
212
|
+
Gk, rk, ek = SPBM_func.get_var(selected_x, selected_f, selected_g, self.delta)
|
213
|
+
|
214
|
+
# SOVER (dual)
|
215
|
+
xk = SPBM_func.subproblem_tr_NoneLower(Gk, ek, xk, rk, self.Paras)
|
216
|
+
|
217
|
+
# print(len(self.f_his))
|
218
|
+
vector_to_parameters(xk, self.model.parameters())
|
219
|
+
|
220
|
+
# tensor type
|
221
|
+
return loss
|
222
|
+
|
223
|
+
class TR_NoneCut(Optimizer):
|
224
|
+
def __init__(self, params, model, hyperparams, Paras):
|
225
|
+
defaults = dict()
|
226
|
+
super().__init__(params, defaults)
|
227
|
+
self.model = model
|
228
|
+
self.cutting_num = hyperparams['cutting_number']
|
229
|
+
self.M = hyperparams['M']
|
230
|
+
self.delta = hyperparams['delta']
|
231
|
+
self.Paras = Paras
|
232
|
+
|
233
|
+
self.x_his, self.g_his, self.f_his = [], [], []
|
234
|
+
|
235
|
+
def step(self, closure=None):
|
236
|
+
if closure is None:
|
237
|
+
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
238
|
+
|
239
|
+
# Reset the gradient and perform forward computation
|
240
|
+
loss = closure()
|
241
|
+
|
242
|
+
with torch.no_grad():
|
243
|
+
xk = parameters_to_vector(self.model.parameters())
|
244
|
+
# print(torch.norm(xk))
|
245
|
+
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
246
|
+
|
247
|
+
# Add cutting plane
|
248
|
+
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
249
|
+
|
250
|
+
# ## Cut selection
|
251
|
+
# selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
252
|
+
|
253
|
+
# the coefficient of dual problem
|
254
|
+
Gk, rk, ek = SPBM_func.get_var(x_his, f_his, g_his, self.delta)
|
255
|
+
|
256
|
+
# SOVER (dual)
|
257
|
+
xk = SPBM_func.subproblem_tr_NoneLower(Gk, ek, xk, rk, self.Paras)
|
258
|
+
|
259
|
+
# print(len(self.f_his))
|
260
|
+
vector_to_parameters(xk, self.model.parameters())
|
261
|
+
|
262
|
+
# tensor type
|
263
|
+
return loss
|
264
|
+
|
265
|
+
# ************************** SPBM-PF **************************
|
266
|
+
class PF_NoneLower(Optimizer):
|
267
|
+
def __init__(self, params, model, hyperparams, Paras):
|
268
|
+
defaults = dict()
|
269
|
+
super().__init__(params, defaults)
|
270
|
+
self.model = model
|
271
|
+
self.cutting_num = hyperparams['cutting_number']
|
272
|
+
self.M = hyperparams['M']
|
273
|
+
self.delta = hyperparams['delta']
|
274
|
+
self.Paras = Paras
|
275
|
+
|
276
|
+
self.x_his, self.g_his, self.f_his = [], [], []
|
277
|
+
|
278
|
+
def step(self, closure=None):
|
279
|
+
if closure is None:
|
280
|
+
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
281
|
+
|
282
|
+
# Reset the gradient and perform forward computation
|
283
|
+
loss = closure()
|
284
|
+
|
285
|
+
with torch.no_grad():
|
286
|
+
xk = parameters_to_vector(self.model.parameters())
|
287
|
+
# print(torch.norm(xk))
|
288
|
+
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
289
|
+
|
290
|
+
# Add cutting plane
|
291
|
+
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
292
|
+
|
293
|
+
## Cut selection
|
294
|
+
selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
295
|
+
|
296
|
+
# the coefficient of dual problem
|
297
|
+
Gk, rk, ek = SPBM_func.get_var(selected_x, selected_f, selected_g, self.delta)
|
298
|
+
|
299
|
+
# SOVER (dual)
|
300
|
+
xk = SPBM_func.subproblem_pf_NoneLower(Gk, ek, xk, self.delta, self.Paras)
|
301
|
+
|
302
|
+
# print(len(self.f_his))
|
303
|
+
vector_to_parameters(xk, self.model.parameters())
|
304
|
+
|
305
|
+
# tensor type
|
306
|
+
return loss
|
307
|
+
|
308
|
+
|
309
|
+
class PF_NoneCut(Optimizer):
|
310
|
+
def __init__(self, params, model, hyperparams, Paras):
|
311
|
+
defaults = dict()
|
312
|
+
super().__init__(params, defaults)
|
313
|
+
self.model = model
|
314
|
+
self.cutting_num = hyperparams['cutting_number']
|
315
|
+
self.M = hyperparams['M']
|
316
|
+
self.delta = hyperparams['delta']
|
317
|
+
self.Paras = Paras
|
318
|
+
|
319
|
+
self.x_his, self.g_his, self.f_his = [], [], []
|
320
|
+
|
321
|
+
def step(self, closure=None):
|
322
|
+
if closure is None:
|
323
|
+
raise RuntimeError("Closure required for CuttingPlaneOptimizer")
|
324
|
+
|
325
|
+
# Reset the gradient and perform forward computation
|
326
|
+
loss = closure()
|
327
|
+
|
328
|
+
with torch.no_grad():
|
329
|
+
xk = parameters_to_vector(self.model.parameters())
|
330
|
+
# print(torch.norm(xk))
|
331
|
+
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
332
|
+
|
333
|
+
# Add cutting plane
|
334
|
+
x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
|
335
|
+
|
336
|
+
# ## Cut selection
|
337
|
+
# selected_x, selected_f, selected_g = SPBM_func.cut_selection(x_his, f_his, g_his, self.M)
|
338
|
+
|
339
|
+
# the coefficient of dual problem
|
340
|
+
Gk, rk, ek = SPBM_func.get_var(x_his, f_his, g_his, self.delta)
|
341
|
+
|
342
|
+
# SOVER (dual)
|
343
|
+
xk = SPBM_func.subproblem_pf_NoneLower(Gk, ek, xk, self.delta, self.Paras)
|
344
|
+
|
345
|
+
# print(len(self.f_his))
|
346
|
+
vector_to_parameters(xk, self.model.parameters())
|
347
|
+
|
348
|
+
# tensor type
|
349
|
+
return loss
|
350
|
+
|