junshan-kit 2.3.9__py2.py3-none-any.whl → 2.4.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- junshan_kit/DataSets.py +1 -1
- junshan_kit/ExperimentHub.py +194 -0
- junshan_kit/Models.py +207 -0
- junshan_kit/Optimizers.py +85 -0
- junshan_kit/Print_Info.py +59 -0
- junshan_kit/TrainingParas.py +470 -0
- junshan_kit/check_args.py +116 -0
- junshan_kit/datahub.py +281 -0
- junshan_kit/kit.py +7 -1
- {junshan_kit-2.3.9.dist-info → junshan_kit-2.4.0.dist-info}/METADATA +1 -2
- junshan_kit-2.4.0.dist-info/RECORD +14 -0
- junshan_kit-2.3.9.dist-info/RECORD +0 -7
- {junshan_kit-2.3.9.dist-info → junshan_kit-2.4.0.dist-info}/WHEEL +0 -0
junshan_kit/DataSets.py
CHANGED
@@ -168,7 +168,7 @@ def adult_income_prediction(data_name = "Adult Income Prediction", print_info =
|
|
168
168
|
|
169
169
|
def TamilNadu_weather_2020_2025(data_name = "TN Weather 2020-2025", print_info = False, export_csv = False):
|
170
170
|
csv_path = f'./exp_data/{data_name}/TNweather_1.8M.csv'
|
171
|
-
drop_cols = []
|
171
|
+
drop_cols = ['Unnamed: 0']
|
172
172
|
label_col = 'rain_tomorrow'
|
173
173
|
label_map = {0: -1, 1: 1}
|
174
174
|
|
@@ -0,0 +1,194 @@
|
|
1
|
+
import sys, os, torch, random
|
2
|
+
import numpy as np
|
3
|
+
import torch.nn as nn
|
4
|
+
from torch.utils.data import Subset
|
5
|
+
|
6
|
+
# # 获取当前脚本所在目录
|
7
|
+
# script_dir = os.path.dirname(os.path.abspath(__file__))
|
8
|
+
# sys.path.append(os.path.join(script_dir, 'src'))
|
9
|
+
|
10
|
+
from junshan_kit import datahub, Models, TrainingParas
|
11
|
+
|
12
|
+
# -------------------------------------
|
13
|
+
def set_seed(seed=42):
|
14
|
+
torch.manual_seed(seed)
|
15
|
+
torch.cuda.manual_seed_all(seed)
|
16
|
+
np.random.seed(seed)
|
17
|
+
random.seed(seed)
|
18
|
+
torch.backends.cudnn.deterministic = True
|
19
|
+
torch.backends.cudnn.benchmark = False
|
20
|
+
|
21
|
+
def device(Paras):
|
22
|
+
device = torch.device(f"{Paras['cuda']}" if torch.cuda.is_available() else "cpu")
|
23
|
+
Paras["device"] = device
|
24
|
+
use_color = sys.stdout.isatty()
|
25
|
+
Paras["use_color"] = use_color
|
26
|
+
|
27
|
+
return Paras
|
28
|
+
|
29
|
+
# -------------------------------------
|
30
|
+
class Train_Steps:
|
31
|
+
def __init__(self, args) -> None:
|
32
|
+
self.args = args
|
33
|
+
|
34
|
+
def _model_map(self, model_name):
|
35
|
+
model_mapping = self.args.model_mapping
|
36
|
+
|
37
|
+
return model_mapping[model_name]
|
38
|
+
|
39
|
+
def get_train_group(self):
|
40
|
+
training_group = []
|
41
|
+
for cfg in self.args.train_group:
|
42
|
+
model, dataset, optimizer = cfg.split("-")
|
43
|
+
training_group.append((self._model_map(model), dataset, optimizer))
|
44
|
+
|
45
|
+
return training_group
|
46
|
+
|
47
|
+
def set_paras(self, results_folder_name, py_name, time_str, OtherParas):
|
48
|
+
Paras = {
|
49
|
+
# Name of the folder where results will be saved.
|
50
|
+
"results_folder_name": results_folder_name,
|
51
|
+
# Whether to draw loss/accuracy figures.
|
52
|
+
"DrawFigs": "ON",
|
53
|
+
# Whether to use log scale when drawing plots.
|
54
|
+
"use_log_scale": "ON",
|
55
|
+
# Print loss every N epochs.
|
56
|
+
"epoch_log_interval": 1,
|
57
|
+
# Timestamp string for result saving.
|
58
|
+
"time_str": time_str,
|
59
|
+
# Random seed
|
60
|
+
"seed": OtherParas['seed'],
|
61
|
+
# Device used for training.
|
62
|
+
"cuda": f"cuda:{self.args.cuda}",
|
63
|
+
|
64
|
+
# batch-size
|
65
|
+
"batch_size": self.args.bs,
|
66
|
+
|
67
|
+
# epochs
|
68
|
+
"epochs": self.args.e,
|
69
|
+
|
70
|
+
# split_train_data
|
71
|
+
"split_train_data": self.args.s,
|
72
|
+
|
73
|
+
# select_subset
|
74
|
+
"select_subset": self.args.subset,
|
75
|
+
|
76
|
+
# subset_number_dict
|
77
|
+
"subset_number_dict": TrainingParas.subset_number_dict(OtherParas),
|
78
|
+
|
79
|
+
# validation
|
80
|
+
"validation": TrainingParas.validation(),
|
81
|
+
|
82
|
+
# validation_rate
|
83
|
+
"validation_rate": TrainingParas.validation_rate(),
|
84
|
+
|
85
|
+
# model list
|
86
|
+
"model_list" : TrainingParas.model_list(),
|
87
|
+
|
88
|
+
# model_type
|
89
|
+
"model_type": TrainingParas.model_type(),
|
90
|
+
|
91
|
+
# data_list
|
92
|
+
"data_list": TrainingParas.data_list(),
|
93
|
+
|
94
|
+
# optimizer_dict
|
95
|
+
"optimizer_dict": TrainingParas.optimizer_dict(OtherParas)
|
96
|
+
}
|
97
|
+
Paras["py_name"] = py_name
|
98
|
+
|
99
|
+
return Paras
|
100
|
+
|
101
|
+
# <Step_3> : Chosen_loss
|
102
|
+
def chosen_loss(self, model_name, Paras):
|
103
|
+
# ---------------------------------------------------
|
104
|
+
# There have an addition parameter
|
105
|
+
if model_name == "LogRegressionBinaryL2":
|
106
|
+
Paras["lambda"] = 1e-3
|
107
|
+
# ---------------------------------------------------
|
108
|
+
|
109
|
+
if model_name in ["LeastSquares"]:
|
110
|
+
loss_fn = nn.MSELoss()
|
111
|
+
|
112
|
+
else:
|
113
|
+
if Paras["model_type"][model_name] == "binary":
|
114
|
+
loss_fn = nn.BCEWithLogitsLoss()
|
115
|
+
|
116
|
+
elif Paras["model_type"][model_name] == "multi":
|
117
|
+
loss_fn = nn.CrossEntropyLoss()
|
118
|
+
|
119
|
+
else:
|
120
|
+
loss_fn = nn.MSELoss()
|
121
|
+
print("\033[91m The loss function is error!\033[0m")
|
122
|
+
assert False
|
123
|
+
Paras["loss_fn"] = loss_fn
|
124
|
+
|
125
|
+
return loss_fn, Paras
|
126
|
+
|
127
|
+
# <Step_4> : import data --> step.py
|
128
|
+
def load_data(self, model_name, data_name, Paras):
|
129
|
+
# load data
|
130
|
+
train_path = f"./exp_data/{data_name}/training_data"
|
131
|
+
test_path = f"./exp_data/{data_name}/test_data"
|
132
|
+
# Paras["train_ratio"] = 1.0
|
133
|
+
# Paras["select_subset"].setdefault(data_name, False)
|
134
|
+
# Paras["validation"].setdefault(data_name, False)
|
135
|
+
|
136
|
+
if data_name == "MNIST":
|
137
|
+
train_dataset, test_dataset, transform = datahub.MNIST(Paras, model_name)
|
138
|
+
|
139
|
+
elif data_name == "CIFAR100":
|
140
|
+
train_dataset, test_dataset, transform = datahub.CIFAR100(Paras, model_name)
|
141
|
+
|
142
|
+
elif data_name == "CALTECH101_Resize_32":
|
143
|
+
Paras["train_ratio"] = 0.7
|
144
|
+
train_dataset, test_dataset, transform = datahub.caltech101_Resize_32(
|
145
|
+
Paras["seed"], Paras["train_ratio"], split=True
|
146
|
+
)
|
147
|
+
|
148
|
+
elif data_name in ["Vowel", "Letter", "Shuttle", "w8a"]:
|
149
|
+
Paras["train_ratio"] = Paras["split_train_data"][data_name]
|
150
|
+
train_dataset, test_dataset, transform = datahub.get_libsvm_data(
|
151
|
+
train_path + ".txt", test_path + ".txt", data_name
|
152
|
+
)
|
153
|
+
|
154
|
+
elif data_name in ["RCV1", "Duke", "Ijcnn"]:
|
155
|
+
Paras["train_ratio"] = Paras["split_train_data"][data_name]
|
156
|
+
train_dataset, test_dataset, transform = datahub.get_libsvm_bz2_data(
|
157
|
+
train_path + ".bz2", test_path + ".bz2", data_name, Paras
|
158
|
+
)
|
159
|
+
|
160
|
+
else:
|
161
|
+
transform = None
|
162
|
+
print(f"The data_name is error!")
|
163
|
+
assert False
|
164
|
+
|
165
|
+
return train_dataset, test_dataset, transform
|
166
|
+
# <Step_4>
|
167
|
+
|
168
|
+
# <subset> : Step 5.1 -->step.py
|
169
|
+
def set_subset(self, data_name, Paras, train_dataset, test_dataset):
|
170
|
+
if self.args.subset[0]>1:
|
171
|
+
train_num = self.args.subset[0]
|
172
|
+
test_num = self.args.subset[1]
|
173
|
+
train_subset_num = min(train_num, len(train_dataset))
|
174
|
+
test_subset_num = min(test_num, len(test_dataset))
|
175
|
+
|
176
|
+
train_subset_indices = list(range(int(train_subset_num)))
|
177
|
+
train_dataset = Subset(train_dataset, train_subset_indices)
|
178
|
+
|
179
|
+
test_subset_indices = list(range(int(test_subset_num)))
|
180
|
+
test_dataset = Subset(test_dataset, test_subset_indices)
|
181
|
+
|
182
|
+
else:
|
183
|
+
train_ratios= self.args.subset[0]
|
184
|
+
test_ratios= self.args.subset[1]
|
185
|
+
|
186
|
+
train_subset_indices = list(range(int(train_ratios * len(train_dataset))))
|
187
|
+
train_dataset = Subset(train_dataset, train_subset_indices)
|
188
|
+
|
189
|
+
test_subset_indices = list(range(int(test_ratios * len(test_dataset))))
|
190
|
+
test_dataset = Subset(test_dataset, test_subset_indices)
|
191
|
+
|
192
|
+
return train_dataset, test_dataset
|
193
|
+
|
194
|
+
|
junshan_kit/Models.py
ADDED
@@ -0,0 +1,207 @@
|
|
1
|
+
import torchvision,torch, random
|
2
|
+
import numpy as np
|
3
|
+
from torchvision.models import resnet18,resnet34, ResNet18_Weights, ResNet34_Weights
|
4
|
+
import torch.nn as nn
|
5
|
+
|
6
|
+
|
7
|
+
# ---------------- Build ResNet18 - Caltech101 -----------------------
|
8
|
+
def Build_ResNet18_CALTECH101_Resize_32():
|
9
|
+
|
10
|
+
"""
|
11
|
+
1. Modify the first convolutional layer for smaller input (e.g., 32x32 instead of 224x224)
|
12
|
+
Original: kernel_size=7, stride=2, padding=3 → changed to 3x3 kernel, stride=1, padding=1
|
13
|
+
|
14
|
+
2. Adjust the final fully connected layer to match the number of Caltech101 classes (101)
|
15
|
+
"""
|
16
|
+
model = resnet18(weights=None)
|
17
|
+
model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) # 1
|
18
|
+
model.fc = nn.Linear(model.fc.in_features, 101) # 2
|
19
|
+
|
20
|
+
return model
|
21
|
+
|
22
|
+
|
23
|
+
# ---------------- Build ResNet18 - CIFAR100 -----------------------
|
24
|
+
def Build_ResNet18_CIFAR100():
|
25
|
+
"""
|
26
|
+
1. Modify the first convolutional layer for smaller input (e.g., 32x32 instead of 224x224)
|
27
|
+
Original: kernel_size=7, stride=2, padding=3 → changed to 3x3 kernel, stride=1, padding=1
|
28
|
+
|
29
|
+
2. Adjust the final fully connected layer to match the number of CIFAR-100 classes (100)
|
30
|
+
"""
|
31
|
+
|
32
|
+
model = resnet18(weights=None)
|
33
|
+
# model = resnet18(weights=ResNet18_Weights.DEFAULT)
|
34
|
+
model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) # 1
|
35
|
+
model.fc = nn.Linear(model.fc.in_features, 100) # 2
|
36
|
+
|
37
|
+
return model
|
38
|
+
|
39
|
+
|
40
|
+
# ---------------- Build ResNet18 - MNIST ----------------------------
|
41
|
+
def Build_ResNet18_MNIST():
|
42
|
+
"""
|
43
|
+
1. Modify the first convolutional layer to accept grayscale input (1 channel instead of 3)
|
44
|
+
Original: in_channels=3 → changed to in_channels=1
|
45
|
+
|
46
|
+
2. Adjust the final fully connected layer to match the number of MNIST classes (10)
|
47
|
+
"""
|
48
|
+
|
49
|
+
model = resnet18(weights=None)
|
50
|
+
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) # 1
|
51
|
+
model.fc = nn.Linear(model.fc.in_features, 10) # 2
|
52
|
+
|
53
|
+
return model
|
54
|
+
|
55
|
+
|
56
|
+
# ---------------- Build ResNet34 - CIFAR100 -----------------------
|
57
|
+
def Build_ResNet34_CIFAR100():
|
58
|
+
|
59
|
+
model = resnet34(weights=None)
|
60
|
+
model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
|
61
|
+
model.fc = nn.Linear(model.fc.in_features, 100)
|
62
|
+
return model
|
63
|
+
|
64
|
+
# ---------------- Build ResNet18 - MNIST ----------------------------
|
65
|
+
def Build_ResNet34_MNIST():
|
66
|
+
# Do not load the pre-trained weights
|
67
|
+
model = resnet34(weights=None)
|
68
|
+
|
69
|
+
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
|
70
|
+
model.fc = nn.Linear(model.fc.in_features, 10)
|
71
|
+
|
72
|
+
return model
|
73
|
+
|
74
|
+
# ---------------- Build ResNet34 - Caltech101 -----------------------
|
75
|
+
def Build_ResNet34_CALTECH101_Resize_32():
|
76
|
+
|
77
|
+
model = resnet34(weights=None)
|
78
|
+
model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
|
79
|
+
model.fc = nn.Linear(model.fc.in_features, 101)
|
80
|
+
return model
|
81
|
+
|
82
|
+
|
83
|
+
#**************************************************************
|
84
|
+
# ---------------------- LeastSquares -------------------------
|
85
|
+
#**************************************************************
|
86
|
+
# ---------------- LeastSquares - MNIST -----------------------
|
87
|
+
def Build_LeastSquares_MNIST():
|
88
|
+
"""
|
89
|
+
1. flatten MNIST images (1x28x28 → 784)
|
90
|
+
2. Use a linear layer for multi-classification
|
91
|
+
"""
|
92
|
+
return nn.Sequential(
|
93
|
+
nn.Flatten(),
|
94
|
+
nn.Linear(28 * 28, 10))
|
95
|
+
|
96
|
+
# ---------------- LeastSquares - CIFAR100 --------------------
|
97
|
+
def Build_LeastSquares_CIFAR100():
|
98
|
+
"""
|
99
|
+
1. flatten MNIST images (3 * 32 * 32 → 784)
|
100
|
+
2. Use a linear layer for multi-classification
|
101
|
+
"""
|
102
|
+
return nn.Sequential(
|
103
|
+
nn.Flatten(),
|
104
|
+
nn.Linear(3 * 32 * 32, 100))
|
105
|
+
|
106
|
+
# ---------------- LeastSquares - Caltech101 ------------------
|
107
|
+
def Build_LeastSquares_CALTECH101_Resize_32():
|
108
|
+
return nn.Sequential(
|
109
|
+
nn.Flatten(),
|
110
|
+
nn.Linear(3*32*32, 101)
|
111
|
+
)
|
112
|
+
|
113
|
+
|
114
|
+
#*************************************************************
|
115
|
+
# --------------- LogRegressionBinary ------------------------
|
116
|
+
#*************************************************************
|
117
|
+
# -------------- LogRegressionBinary - MNIST ------------------
|
118
|
+
def Build_LogRegressionBinary_MNIST():
|
119
|
+
"""
|
120
|
+
1. flatten MNIST images (1x28x28 → 784)
|
121
|
+
2. Use a linear layer for binary classification
|
122
|
+
"""
|
123
|
+
return nn.Sequential(
|
124
|
+
nn.Flatten(),
|
125
|
+
nn.Linear(28 * 28, 1))
|
126
|
+
|
127
|
+
|
128
|
+
# --------------- LogRegressionBinary - CIFAR100 --------------
|
129
|
+
def Build_LogRegressionBinary_CIFAR100():
|
130
|
+
"""
|
131
|
+
1. flatten CIFAR100 images
|
132
|
+
2. Use a linear layer for binary classification
|
133
|
+
"""
|
134
|
+
return nn.Sequential(
|
135
|
+
nn.Flatten(),
|
136
|
+
nn.Linear(3* 32 * 32, 1))
|
137
|
+
|
138
|
+
# -------------- LogRegressionBinary - RCV1 ------------------
|
139
|
+
def Build_LogRegressionBinary_RCV1():
|
140
|
+
"""
|
141
|
+
1. Use a linear layer for binary classification
|
142
|
+
"""
|
143
|
+
return nn.Sequential(
|
144
|
+
nn.Linear(47236, 1))
|
145
|
+
|
146
|
+
# <LogRegressionBinaryL2>
|
147
|
+
#**************************************************************
|
148
|
+
# ------------- LogRegressionBinaryL2 -------------------------
|
149
|
+
#**************************************************************
|
150
|
+
def Build_LogRegressionBinaryL2_RCV1():
|
151
|
+
"""
|
152
|
+
1. Use a linear layer for binary classification
|
153
|
+
"""
|
154
|
+
return nn.Sequential(
|
155
|
+
nn.Linear(47236, 1))
|
156
|
+
# <LogRegressionBinaryL2>
|
157
|
+
|
158
|
+
# ---------------------------------------------------------
|
159
|
+
def Build_LogRegressionBinaryL2_MNIST():
|
160
|
+
"""
|
161
|
+
1. flatten MNIST images (1x28x28 -> 784)
|
162
|
+
2. Use a linear layer for binary classification
|
163
|
+
"""
|
164
|
+
return nn.Sequential(
|
165
|
+
nn.Flatten(),
|
166
|
+
nn.Linear(28 * 28, 1))
|
167
|
+
|
168
|
+
# ---------------------------------------------------------
|
169
|
+
def Build_LogRegressionBinaryL2_CIFAR100():
|
170
|
+
"""
|
171
|
+
1. flatten CIFAR100 images
|
172
|
+
2. Use a linear layer for binary classification
|
173
|
+
"""
|
174
|
+
return nn.Sequential(
|
175
|
+
nn.Flatten(),
|
176
|
+
nn.Linear(3* 32 * 32, 1))
|
177
|
+
|
178
|
+
# ---------------------------------------------------------
|
179
|
+
def Build_LogRegressionBinaryL2_Duke():
|
180
|
+
"""
|
181
|
+
Use a linear layer for binary classification
|
182
|
+
"""
|
183
|
+
return nn.Sequential(
|
184
|
+
nn.Flatten(),
|
185
|
+
nn.Linear(7129, 1))
|
186
|
+
|
187
|
+
# ---------------------------------------------------------
|
188
|
+
def Build_LogRegressionBinaryL2_Ijcnn():
|
189
|
+
"""
|
190
|
+
Use a linear layer for binary classification
|
191
|
+
"""
|
192
|
+
return nn.Sequential(
|
193
|
+
nn.Flatten(),
|
194
|
+
nn.Linear(22, 1))
|
195
|
+
|
196
|
+
# ---------------------------------------------------------
|
197
|
+
def Build_LogRegressionBinaryL2_w8a():
|
198
|
+
"""
|
199
|
+
Use a linear layer for binary classification
|
200
|
+
"""
|
201
|
+
return nn.Sequential(
|
202
|
+
nn.Flatten(),
|
203
|
+
nn.Linear(300, 1))
|
204
|
+
|
205
|
+
|
206
|
+
|
207
|
+
|
@@ -0,0 +1,85 @@
|
|
1
|
+
import torch, os, time
|
2
|
+
from torch.optim.optimizer import Optimizer
|
3
|
+
from torch.nn.utils import parameters_to_vector, vector_to_parameters
|
4
|
+
|
5
|
+
class SPSmax(Optimizer):
|
6
|
+
def __init__(self, params, model, hyperparams, Paras):
|
7
|
+
defaults = dict()
|
8
|
+
super().__init__(params, defaults)
|
9
|
+
self.model = model
|
10
|
+
self.c = hyperparams['c']
|
11
|
+
self.gamma = hyperparams['gamma']
|
12
|
+
if 'f_star' not in Paras or Paras['f_star'] is None:
|
13
|
+
self.f_star = 0
|
14
|
+
else:
|
15
|
+
self.f_star = Paras['f_star']
|
16
|
+
self.step_size = []
|
17
|
+
|
18
|
+
def step(self, closure=None):
|
19
|
+
if closure is None:
|
20
|
+
raise RuntimeError("Closure required for SPSmax")
|
21
|
+
|
22
|
+
# Reset the gradient and perform forward computation
|
23
|
+
loss = closure()
|
24
|
+
|
25
|
+
with torch.no_grad():
|
26
|
+
xk = parameters_to_vector(self.model.parameters())
|
27
|
+
# print(torch.norm(xk))
|
28
|
+
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
29
|
+
|
30
|
+
# Step-size
|
31
|
+
step_size = (loss - self.f_star) / ((self.c * torch.norm(g_k, p=2) ** 2) + 1e-8)
|
32
|
+
step_size = min(step_size, self.gamma)
|
33
|
+
self.step_size.append(step_size)
|
34
|
+
|
35
|
+
# Update
|
36
|
+
xk = xk - step_size * g_k
|
37
|
+
|
38
|
+
# print(len(self.f_his))
|
39
|
+
vector_to_parameters(xk, self.model.parameters())
|
40
|
+
|
41
|
+
# emporarily return loss (tensor type)
|
42
|
+
return loss
|
43
|
+
|
44
|
+
|
45
|
+
class ALR_SMAG(Optimizer):
|
46
|
+
def __init__(self, params, model, hyperparams, Paras):
|
47
|
+
defaults = dict()
|
48
|
+
super().__init__(params, defaults)
|
49
|
+
self.model = model
|
50
|
+
self.c = hyperparams['c']
|
51
|
+
self.eta_max = hyperparams['eta_max']
|
52
|
+
self.beta = hyperparams['beta']
|
53
|
+
if 'f_star' not in Paras or Paras['f_star'] is None:
|
54
|
+
self.f_star = 0
|
55
|
+
else:
|
56
|
+
self.f_star = Paras['f_star']
|
57
|
+
self.step_size = []
|
58
|
+
self.d_k = torch.zeros_like(parameters_to_vector(self.model.parameters()))
|
59
|
+
|
60
|
+
def step(self, closure=None):
|
61
|
+
if closure is None:
|
62
|
+
raise RuntimeError("Closure required for SPSmax")
|
63
|
+
|
64
|
+
# Reset the gradient and perform forward computation
|
65
|
+
loss = closure()
|
66
|
+
|
67
|
+
with torch.no_grad():
|
68
|
+
xk = parameters_to_vector(self.model.parameters())
|
69
|
+
# print(torch.norm(xk))
|
70
|
+
g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
|
71
|
+
|
72
|
+
self.d_k = self.beta * self.d_k + g_k
|
73
|
+
# Step-size
|
74
|
+
step_size = (loss - self.f_star) / ((self.c * torch.norm(self.d_k, p=2) ** 2) + 1e-8)
|
75
|
+
step_size = min(step_size, self.eta_max)
|
76
|
+
self.step_size.append(step_size)
|
77
|
+
|
78
|
+
# Update
|
79
|
+
xk = xk - step_size * g_k
|
80
|
+
|
81
|
+
# print(len(self.f_his))
|
82
|
+
vector_to_parameters(xk, self.model.parameters())
|
83
|
+
|
84
|
+
# emporarily return loss (tensor type)
|
85
|
+
return loss
|
@@ -0,0 +1,59 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
# -------------------------------------------------------------
|
5
|
+
def training_group(training_group):
|
6
|
+
print(f"--------------------- training_group ------------------")
|
7
|
+
for g in training_group:
|
8
|
+
print(g)
|
9
|
+
print(f"-------------------------------------------------------")
|
10
|
+
|
11
|
+
|
12
|
+
def training_info(use_color, data_name, optimizer_name, folder_path, hyperparams, Paras, model_name):
|
13
|
+
if use_color:
|
14
|
+
print("\033[90m" + "-" * 115 + "\033[0m")
|
15
|
+
print(
|
16
|
+
f"\033[32m✅ \033[34mDataset:\033[32m {data_name}, \t\033[34mBatch-size:\033[32m {Paras['batch_size'][data_name]}, \t\033[34m(training, val, test) = \033[32m ({Paras['training_samples']}, {Paras['val_samples']}, {Paras['test_samples']}), \t\033[34m device:\033[32m {Paras['device']}"
|
17
|
+
)
|
18
|
+
print(
|
19
|
+
f"\033[32m✅ \033[34mOptimizer:\033[32m {optimizer_name}, \t\033[34mParams:\033[32m {hyperparams}"
|
20
|
+
)
|
21
|
+
print(
|
22
|
+
f'\033[32m✅ \033[34mmodel:\033[32m {model_name}, \t\033[34mmodel type:\033[32m {Paras["model_type"][model_name]},\t\033[34m loss_fn:\033[32m {Paras["loss_fn"]},\t\033[34m Subset:\033[32m {Paras["select_subset"][data_name]}'
|
23
|
+
)
|
24
|
+
print(f"\033[32m✅ \033[34mfolder_path:\033[32m {folder_path}")
|
25
|
+
print("\033[90m" + "-" * 115 + "\033[0m")
|
26
|
+
|
27
|
+
else:
|
28
|
+
print("-" * 115)
|
29
|
+
print(
|
30
|
+
f"✅ Dataset: {data_name}, \tBatch-size: {Paras['batch_size'][data_name]}, \t(training, val, test) = ({Paras['training_samples']}, {Paras['val_samples']}, {Paras['test_samples']}), \tdevice: {Paras['device']}"
|
31
|
+
)
|
32
|
+
print(f"✅ Optimizer: {optimizer_name}, \tParams: {hyperparams}")
|
33
|
+
print(
|
34
|
+
f'✅ model: {model_name}, \t model type: {Paras["model_type"][model_name]}, loss_fn: {Paras["loss_fn"]}'
|
35
|
+
)
|
36
|
+
print(f"✅ folder_path: {folder_path}")
|
37
|
+
print("-" * 115)
|
38
|
+
|
39
|
+
# <Step_7_2>
|
40
|
+
|
41
|
+
def print_per_epoch_info(use_color, epoch, Paras, epoch_loss, training_loss, training_acc, test_loss, test_acc, run_time):
|
42
|
+
epochs = Paras["epochs"][Paras["data_name"]]
|
43
|
+
# result = [(k, f"{v:.4f}") for k, v in run_time.items()]
|
44
|
+
if use_color:
|
45
|
+
print(
|
46
|
+
f'\033[34m epoch = \033[32m{epoch+1}/{epochs}\033[0m,\t\b'
|
47
|
+
f'\033[34m epoch_loss = \033[32m{epoch_loss[epoch+1]:.4e}\033[0m,\t\b'
|
48
|
+
f'\033[34m train_loss = \033[32m{training_loss[epoch+1]:.4e}\033[0m,\t\b'
|
49
|
+
f'\033[34m train_acc = \033[32m{100 * training_acc[epoch+1]:.2f}%\033[0m,\t\b'
|
50
|
+
f'\033[34m test_acc = \033[32m{100 * test_acc[epoch+1]:.2f}%\033[0m,\t\b'
|
51
|
+
f'\033[34m time (ep, tr, te) = \033[32m({run_time["epoch"]:.2f}, {run_time["train"]:.2f}, {run_time["test"]:.2f})\033[0m')
|
52
|
+
else:
|
53
|
+
print(
|
54
|
+
f'epoch = {epoch+1}/{epochs},\t'
|
55
|
+
f'epoch_loss = {epoch_loss[epoch+1]:.4e},\t'
|
56
|
+
f'train_loss = {training_loss[epoch+1]:.4e},\t'
|
57
|
+
f'train_acc = {100 * training_acc[epoch+1]:.2f}%,\t'
|
58
|
+
f'test_acc = {100 * test_acc[epoch+1]:.2f}%,\t'
|
59
|
+
f'time (ep, tr, te) = ({run_time["epoch"]:.2f}, {run_time["train"]:.2f}, {run_time["test"]:.2f})')
|