mct-nightly 2.1.0.20240725.446__py3-none-any.whl → 2.1.0.20240727.431__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.1.0.20240725.446.dist-info → mct_nightly-2.1.0.20240727.431.dist-info}/METADATA +1 -1
- {mct_nightly-2.1.0.20240725.446.dist-info → mct_nightly-2.1.0.20240727.431.dist-info}/RECORD +35 -31
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/pytorch/constants.py +6 -1
- model_compression_toolkit/core/pytorch/utils.py +27 -0
- model_compression_toolkit/data_generation/common/data_generation.py +20 -18
- model_compression_toolkit/data_generation/common/data_generation_config.py +8 -11
- model_compression_toolkit/data_generation/common/enums.py +24 -12
- model_compression_toolkit/data_generation/common/image_pipeline.py +50 -12
- model_compression_toolkit/data_generation/common/model_info_exctractors.py +0 -8
- model_compression_toolkit/data_generation/common/optimization_utils.py +7 -11
- model_compression_toolkit/data_generation/keras/constants.py +5 -2
- model_compression_toolkit/data_generation/keras/image_operations.py +189 -0
- model_compression_toolkit/data_generation/keras/image_pipeline.py +50 -104
- model_compression_toolkit/data_generation/keras/keras_data_generation.py +28 -36
- model_compression_toolkit/data_generation/keras/model_info_exctractors.py +0 -13
- model_compression_toolkit/data_generation/keras/optimization_functions/bn_layer_weighting_functions.py +16 -6
- model_compression_toolkit/data_generation/keras/optimization_functions/lr_scheduler.py +219 -0
- model_compression_toolkit/data_generation/keras/optimization_functions/output_loss_functions.py +39 -13
- model_compression_toolkit/data_generation/keras/optimization_functions/scheduler_step_functions.py +6 -98
- model_compression_toolkit/data_generation/keras/optimization_utils.py +15 -28
- model_compression_toolkit/data_generation/pytorch/constants.py +4 -1
- model_compression_toolkit/data_generation/pytorch/image_operations.py +105 -0
- model_compression_toolkit/data_generation/pytorch/image_pipeline.py +70 -78
- model_compression_toolkit/data_generation/pytorch/model_info_exctractors.py +0 -10
- model_compression_toolkit/data_generation/pytorch/optimization_functions/bn_layer_weighting_functions.py +17 -6
- model_compression_toolkit/data_generation/pytorch/optimization_functions/image_initilization.py +2 -2
- model_compression_toolkit/data_generation/pytorch/optimization_functions/lr_scheduler.py +219 -0
- model_compression_toolkit/data_generation/pytorch/optimization_functions/output_loss_functions.py +55 -21
- model_compression_toolkit/data_generation/pytorch/optimization_functions/scheduler_step_functions.py +15 -0
- model_compression_toolkit/data_generation/pytorch/optimization_utils.py +32 -54
- model_compression_toolkit/data_generation/pytorch/pytorch_data_generation.py +57 -52
- {mct_nightly-2.1.0.20240725.446.dist-info → mct_nightly-2.1.0.20240727.431.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.1.0.20240725.446.dist-info → mct_nightly-2.1.0.20240727.431.dist-info}/WHEEL +0 -0
- {mct_nightly-2.1.0.20240725.446.dist-info → mct_nightly-2.1.0.20240727.431.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,219 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
from typing import Dict, Union
|
16
|
+
|
17
|
+
import numpy as np
|
18
|
+
import tensorflow as tf
|
19
|
+
|
20
|
+
from model_compression_toolkit.logger import Logger
|
21
|
+
|
22
|
+
|
23
|
+
class ReduceLROnPlateau(tf.keras.callbacks.Callback):
|
24
|
+
"""
|
25
|
+
Reduce learning rate when a metric has stopped improving.
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(self, optimizer: tf.keras.optimizers.Optimizer, mode: str = 'min', factor: float = 0.1,
|
29
|
+
patience: int = 10, threshold: float = 1e-4, threshold_mode: str = 'rel', cooldown: int = 0,
|
30
|
+
min_lr: float = 0, eps: float = 1e-8, verbose: bool = False):
|
31
|
+
"""
|
32
|
+
Initialize the ReduceLROnPlateau scheduler.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
optimizer (tf.keras.optimizers.Optimizer): Wrapped optimizer.
|
36
|
+
mode (str): One of `min`, `max`. In `min` mode, lr will be reduced when the quantity
|
37
|
+
monitored has stopped decreasing; in `max` mode it will be reduced when the
|
38
|
+
quantity monitored has stopped increasing. Default: 'min'.
|
39
|
+
factor (float): Factor by which the learning rate will be reduced. new_lr = lr * factor.
|
40
|
+
Default: 0.1.
|
41
|
+
patience (int): Number of epochs with no improvement after which learning rate will be reduced.
|
42
|
+
Default: 10.
|
43
|
+
threshold (float): Threshold for measuring the new optimum, to only focus on significant changes.
|
44
|
+
Default: 1e-4.
|
45
|
+
threshold_mode (str): One of `rel`, `abs`. In `rel` mode, dynamic_threshold = best * ( 1 + threshold )
|
46
|
+
in 'max' mode or best * ( 1 - threshold ) in `min` mode. In `abs` mode, dynamic_threshold
|
47
|
+
= best + threshold in `max` mode or best - threshold in `min` mode. Default: 'rel'.
|
48
|
+
cooldown (int): Number of epochs to wait before resuming normal operation after lr has been reduced.
|
49
|
+
Default: 0.
|
50
|
+
min_lr (float): A lower bound on the learning rate. Default: 0.
|
51
|
+
eps (float): Minimal decay applied to lr. If the difference between new and old lr is smaller than eps,
|
52
|
+
the update is ignored. Default: 1e-8.
|
53
|
+
verbose (bool): If True, prints a message to stdout for each update. Default: False.
|
54
|
+
"""
|
55
|
+
|
56
|
+
super(ReduceLROnPlateau, self).__init__()
|
57
|
+
|
58
|
+
if factor >= 1.0:
|
59
|
+
Logger.critical('Factor should be < 1.0.') # pragma: no cover
|
60
|
+
self.factor = factor
|
61
|
+
|
62
|
+
self.optimizer = optimizer
|
63
|
+
self.min_lr = min_lr
|
64
|
+
self.patience = patience
|
65
|
+
self.verbose = verbose
|
66
|
+
self.cooldown = cooldown
|
67
|
+
self.cooldown_counter = 0
|
68
|
+
self.mode = mode
|
69
|
+
self.threshold = threshold
|
70
|
+
self.threshold_mode = threshold_mode
|
71
|
+
self.best = None
|
72
|
+
self.num_bad_epochs = None
|
73
|
+
self.mode_worse = None # the worse value for the chosen mode
|
74
|
+
self.eps = eps
|
75
|
+
self.last_epoch = 0
|
76
|
+
self._init_is_better(mode=mode, threshold=threshold, threshold_mode=threshold_mode)
|
77
|
+
self._reset()
|
78
|
+
|
79
|
+
def _reset(self) -> None:
|
80
|
+
"""
|
81
|
+
Resets num_bad_epochs counter and cooldown counter.
|
82
|
+
"""
|
83
|
+
self.best = self.mode_worse
|
84
|
+
self.cooldown_counter = 0
|
85
|
+
self.num_bad_epochs = 0
|
86
|
+
|
87
|
+
def on_epoch_end(self, epoch: int, loss: float, logs: dict = None) -> None:
|
88
|
+
"""
|
89
|
+
Check conditions and update learning rate at the end of an epoch.
|
90
|
+
|
91
|
+
Args:
|
92
|
+
epoch (int): The current epoch number.
|
93
|
+
loss (float): Validation loss value.
|
94
|
+
logs (dict): The dictionary of logs from the epoch.
|
95
|
+
"""
|
96
|
+
current = float(loss)
|
97
|
+
|
98
|
+
if self.is_better(current, self.best):
|
99
|
+
self.best = current
|
100
|
+
self.num_bad_epochs = 0
|
101
|
+
else:
|
102
|
+
self.num_bad_epochs += 1
|
103
|
+
|
104
|
+
if self.in_cooldown:
|
105
|
+
self.cooldown_counter -= 1
|
106
|
+
self.num_bad_epochs = 0 # Ignore any bad epochs in cooldown
|
107
|
+
|
108
|
+
if self.num_bad_epochs > self.patience:
|
109
|
+
self._reduce_lr(epoch)
|
110
|
+
self.cooldown_counter = self.cooldown
|
111
|
+
self.num_bad_epochs = 0
|
112
|
+
self.best = self.mode_worse
|
113
|
+
|
114
|
+
def _reduce_lr(self, epoch: int) -> None:
|
115
|
+
"""
|
116
|
+
Reduce the learning rate for each parameter group.
|
117
|
+
|
118
|
+
Args:
|
119
|
+
epoch (int): The current epoch number.
|
120
|
+
"""
|
121
|
+
old_lr = float(tf.keras.backend.get_value(self.optimizer.learning_rate))
|
122
|
+
new_lr = max(old_lr * self.factor, self.min_lr)
|
123
|
+
if old_lr - new_lr > self.eps:
|
124
|
+
tf.keras.backend.set_value(self.optimizer.learning_rate, new_lr)
|
125
|
+
if self.verbose:
|
126
|
+
print(f'Epoch {epoch:05d}: reducing learning rate to {new_lr:.4e}.')
|
127
|
+
|
128
|
+
@property
|
129
|
+
def in_cooldown(self) -> bool:
|
130
|
+
"""
|
131
|
+
Check if the scheduler is in a cooldown period.
|
132
|
+
|
133
|
+
Returns:
|
134
|
+
bool: True if in cooldown period, False otherwise.
|
135
|
+
"""
|
136
|
+
return self.cooldown_counter > 0
|
137
|
+
|
138
|
+
def is_better(self, a: float, best: Union[float, None]) -> bool:
|
139
|
+
"""
|
140
|
+
Determine if the new value is better than the best value based on mode and threshold.
|
141
|
+
|
142
|
+
Args:
|
143
|
+
a (float): The new value to compare.
|
144
|
+
best (float): The best value to compare against.
|
145
|
+
|
146
|
+
Returns:
|
147
|
+
bool: True if the new value is better, False otherwise.
|
148
|
+
"""
|
149
|
+
if best is None:
|
150
|
+
return True
|
151
|
+
|
152
|
+
if self.mode == 'min' and self.threshold_mode == 'rel':
|
153
|
+
rel_epsilon = 1. - self.threshold
|
154
|
+
return a < best * rel_epsilon
|
155
|
+
elif self.mode == 'min' and self.threshold_mode == 'abs':
|
156
|
+
return a < best - self.threshold
|
157
|
+
elif self.mode == 'max' and self.threshold_mode == 'rel':
|
158
|
+
rel_epsilon = self.threshold + 1.
|
159
|
+
return a > best * rel_epsilon
|
160
|
+
else: # mode == 'max' and threshold_mode == 'abs':
|
161
|
+
return a > best + self.threshold
|
162
|
+
|
163
|
+
def _init_is_better(self, mode: str, threshold: float, threshold_mode: str) -> None:
|
164
|
+
"""
|
165
|
+
Initialize the comparison function for determining if a new value is better.
|
166
|
+
|
167
|
+
Args:
|
168
|
+
mode (str): The mode for comparison, 'min' or 'max'.
|
169
|
+
threshold (float): The threshold for comparison.
|
170
|
+
threshold_mode (str): The mode for threshold, 'rel' or 'abs'.
|
171
|
+
|
172
|
+
Raises:
|
173
|
+
ValueError: If an unknown mode or threshold mode is provided.
|
174
|
+
"""
|
175
|
+
if mode not in {'min', 'max'}:
|
176
|
+
Logger.critical(f'mode {mode} is unknown!') # pragma: no cover
|
177
|
+
if threshold_mode not in {'rel', 'abs'}:
|
178
|
+
Logger.critical(f'threshold mode {threshold_mode} is unknown!') # pragma: no cover
|
179
|
+
|
180
|
+
if mode == 'min':
|
181
|
+
self.mode_worse = float('inf')
|
182
|
+
else: # mode == 'max':
|
183
|
+
self.mode_worse = float('-inf')
|
184
|
+
|
185
|
+
self.mode = mode
|
186
|
+
self.threshold = threshold
|
187
|
+
self.threshold_mode = threshold_mode
|
188
|
+
|
189
|
+
def get_config(self) -> Dict:
|
190
|
+
"""
|
191
|
+
Return the configuration of the scheduler as a dictionary.
|
192
|
+
|
193
|
+
Returns:
|
194
|
+
Dict: The configuration of the scheduler.
|
195
|
+
"""
|
196
|
+
config = {
|
197
|
+
'factor': self.factor,
|
198
|
+
'patience': self.patience,
|
199
|
+
'verbose': self.verbose,
|
200
|
+
'mode': self.mode,
|
201
|
+
'threshold': self.threshold,
|
202
|
+
'threshold_mode': self.threshold_mode,
|
203
|
+
'cooldown': self.cooldown,
|
204
|
+
'min_lr': self.min_lr,
|
205
|
+
'eps': self.eps
|
206
|
+
}
|
207
|
+
base_config = super(ReduceLROnPlateau, self).get_config()
|
208
|
+
return {**base_config, **config}
|
209
|
+
|
210
|
+
def set_config(self, config: Dict) -> None:
|
211
|
+
"""
|
212
|
+
Set the configuration of the scheduler from a dictionary.
|
213
|
+
|
214
|
+
Args:
|
215
|
+
config (Dict): The configuration dictionary.
|
216
|
+
"""
|
217
|
+
for key, value in config.items():
|
218
|
+
setattr(self, key, value)
|
219
|
+
|
model_compression_toolkit/data_generation/keras/optimization_functions/output_loss_functions.py
CHANGED
@@ -22,7 +22,7 @@ from model_compression_toolkit.data_generation.keras.model_info_exctractors impo
|
|
22
22
|
|
23
23
|
# Function to calculate the regularized min-max difference loss
|
24
24
|
def regularized_min_max_diff(
|
25
|
-
|
25
|
+
model_outputs: tf.Tensor,
|
26
26
|
activation_extractor: KerasActivationExtractor,
|
27
27
|
tape: tf.GradientTape,
|
28
28
|
eps: float = 1e-6,
|
@@ -33,7 +33,7 @@ def regularized_min_max_diff(
|
|
33
33
|
This function calculates the regularized min-max difference loss based on the provided inputs.
|
34
34
|
|
35
35
|
Args:
|
36
|
-
|
36
|
+
model_outputs (tf.Tensor): Output images or tensors.
|
37
37
|
activation_extractor (KerasActivationExtractor): Activation extractor object.
|
38
38
|
tape (tf.GradientTape): TensorFlow tape for recording operations.
|
39
39
|
eps (float, optional): Small constant to prevent division by zero.
|
@@ -82,38 +82,63 @@ def regularized_min_max_diff(
|
|
82
82
|
return output_loss
|
83
83
|
|
84
84
|
|
85
|
-
def
|
86
|
-
|
85
|
+
def inverse_min_max_diff(
|
86
|
+
model_outputs: tf.Tensor,
|
87
87
|
eps: float = 1e-6,
|
88
88
|
**kwargs) -> tf.Tensor:
|
89
89
|
"""
|
90
|
-
Calculate the
|
90
|
+
Calculate the inverse of the maximum - minimum difference of the model output on the input images.
|
91
91
|
|
92
92
|
Args:
|
93
|
-
|
93
|
+
model_outputs (Tensor or List[Tensor]): The output of the model on images.
|
94
94
|
eps (float): Small value for numerical stability.
|
95
95
|
**kwargs: Additional keyword arguments.
|
96
96
|
|
97
97
|
Returns:
|
98
98
|
Tensor: The computed minimum-maximum difference loss.
|
99
99
|
"""
|
100
|
-
if not isinstance(
|
101
|
-
|
102
|
-
output_loss =
|
103
|
-
for output in
|
100
|
+
if not isinstance(model_outputs, (list, tuple)):
|
101
|
+
model_outputs = [model_outputs]
|
102
|
+
output_loss = tf.zeros(1)
|
103
|
+
for output in model_outputs:
|
104
104
|
output = tf.reshape(output, [output.shape[0], -1])
|
105
105
|
output_loss += 1 / (tf.reduce_max(output, 1) - tf.reduce_min(output, 1) + eps)
|
106
106
|
return output_loss
|
107
107
|
|
108
|
+
def negative_min_max_diff(
|
109
|
+
model_outputs: tf.Tensor,
|
110
|
+
eps: float = 1e-6,
|
111
|
+
**kwargs) -> tf.Tensor:
|
112
|
+
"""
|
113
|
+
Calculate the inverse of the maximum - minimum difference of the model output on the input images.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
model_outputs (Tensor or List[Tensor]): The output of the model on images.
|
117
|
+
eps (float): Small value for numerical stability.
|
118
|
+
**kwargs: Additional keyword arguments.
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
Tensor: The computed minimum-maximum difference loss.
|
122
|
+
"""
|
123
|
+
if not isinstance(model_outputs, (list, tuple)):
|
124
|
+
model_outputs = [model_outputs]
|
125
|
+
output_loss = tf.zeros(1)
|
126
|
+
for output in model_outputs:
|
127
|
+
output = tf.reshape(output, [output.shape[0], -1])
|
128
|
+
out_max = tf.reduce_max(output, 1)
|
129
|
+
out_min = tf.reduce_min(output, 1)
|
130
|
+
output_loss += tf.reduce_mean(-(out_max - out_min))
|
131
|
+
return output_loss
|
132
|
+
|
108
133
|
|
109
134
|
def no_output_loss(
|
110
|
-
|
135
|
+
model_outputs: tf.Tensor,
|
111
136
|
**kwargs) -> tf.Tensor:
|
112
137
|
"""
|
113
138
|
Calculate no output loss.
|
114
139
|
|
115
140
|
Args:
|
116
|
-
|
141
|
+
model_outputs (Tensor): The output of the model on images.
|
117
142
|
**kwargs: Additional keyword arguments.
|
118
143
|
|
119
144
|
Returns:
|
@@ -125,6 +150,7 @@ def no_output_loss(
|
|
125
150
|
# Dictionary of output loss functions
|
126
151
|
output_loss_function_dict: Dict[OutputLossType, Callable] = {
|
127
152
|
OutputLossType.NONE: no_output_loss,
|
128
|
-
OutputLossType.
|
153
|
+
OutputLossType.NEGATIVE_MIN_MAX_DIFF: negative_min_max_diff,
|
154
|
+
OutputLossType.INVERSE_MIN_MAX_DIFF: inverse_min_max_diff,
|
129
155
|
OutputLossType.REGULARIZED_MIN_MAX_DIFF: regularized_min_max_diff,
|
130
156
|
}
|
model_compression_toolkit/data_generation/keras/optimization_functions/scheduler_step_functions.py
CHANGED
@@ -18,104 +18,11 @@ from typing import Callable, Dict
|
|
18
18
|
import numpy as np
|
19
19
|
|
20
20
|
from model_compression_toolkit.data_generation.common.enums import SchedulerType
|
21
|
+
from model_compression_toolkit.data_generation.keras.optimization_functions.lr_scheduler import \
|
22
|
+
ReduceLROnPlateau
|
21
23
|
|
22
24
|
|
23
|
-
|
24
|
-
# Customized for gradient taping
|
25
|
-
class CustomReduceLROnPlateau:
|
26
|
-
def __init__(self,
|
27
|
-
factor: float = 0.5,
|
28
|
-
patience: int = 10,
|
29
|
-
min_delta: float = 1e-4,
|
30
|
-
cooldown: int = 0,
|
31
|
-
min_lr: float = 1e-6,
|
32
|
-
sign_number: int = 4,
|
33
|
-
optim_lr=None,
|
34
|
-
):
|
35
|
-
"""
|
36
|
-
Initialize a custom learning rate scheduler based on ReduceLROnPlateau.
|
37
|
-
|
38
|
-
Args:
|
39
|
-
factor (float): Factor by which the learning rate will be reduced.
|
40
|
-
patience (int): Number of epochs with no improvement after which learning rate will be reduced.
|
41
|
-
min_delta (float): Minimum change in monitored value to qualify as an improvement.
|
42
|
-
cooldown (int): Number of epochs to wait before resuming after reducing the learning rate.
|
43
|
-
min_lr (float): Lower bound on the learning rate.
|
44
|
-
sign_number (int): Number of significant digits to consider for comparisons when checking for improvement.
|
45
|
-
optim_lr (tf.Variable): Optimizer learning rate variable to synchronize with the reduced learning rate.
|
46
|
-
"""
|
47
|
-
|
48
|
-
self.optim_lr = optim_lr
|
49
|
-
self.factor = factor
|
50
|
-
self.min_lr = min_lr
|
51
|
-
self.min_delta = min_delta
|
52
|
-
self.patience = patience
|
53
|
-
self.cooldown = cooldown
|
54
|
-
self.cooldown_counter = 0
|
55
|
-
self.wait = 0
|
56
|
-
self.best = 0
|
57
|
-
self.monitor_op = None
|
58
|
-
self.sign_number = sign_number
|
59
|
-
self.reduce_lr = True
|
60
|
-
self._reset()
|
61
|
-
|
62
|
-
def _reset(self):
|
63
|
-
"""
|
64
|
-
Reset the internal state of the learning rate scheduler.
|
65
|
-
"""
|
66
|
-
self.monitor_op = lambda a, b: np.less(a, b - self.min_delta)
|
67
|
-
self.best = np.Inf
|
68
|
-
self.cooldown_counter = 0
|
69
|
-
self.wait = 0
|
70
|
-
|
71
|
-
def on_epoch_end(self,
|
72
|
-
loss: float,
|
73
|
-
logs=None):
|
74
|
-
"""
|
75
|
-
Update the learning rate based on the validation loss at the end of each epoch.
|
76
|
-
|
77
|
-
Args:
|
78
|
-
loss (float): Validation loss value.
|
79
|
-
logs (dict): Dictionary of training metrics and logs.
|
80
|
-
|
81
|
-
Notes:
|
82
|
-
This method should be called at the end of each epoch during training.
|
83
|
-
"""
|
84
|
-
logs = logs or {}
|
85
|
-
logs['lr'] = float(self.optim_lr.learning_rate.numpy())
|
86
|
-
current = float(loss)
|
87
|
-
|
88
|
-
if self.in_cooldown():
|
89
|
-
self.cooldown_counter -= 1
|
90
|
-
self.wait = 0
|
91
|
-
|
92
|
-
if self.monitor_op(current, self.best):
|
93
|
-
self.best = current
|
94
|
-
self.wait = 0
|
95
|
-
elif not self.in_cooldown():
|
96
|
-
self.wait += 1
|
97
|
-
if self.wait >= self.patience:
|
98
|
-
|
99
|
-
old_lr = float(self.optim_lr.learning_rate.numpy())
|
100
|
-
if old_lr > self.min_lr and self.reduce_lr:
|
101
|
-
new_lr = old_lr * self.factor
|
102
|
-
|
103
|
-
new_lr = max(new_lr, self.min_lr)
|
104
|
-
self.optim_lr.learning_rate.assign(new_lr)
|
105
|
-
self.cooldown_counter = self.cooldown
|
106
|
-
self.wait = 0
|
107
|
-
|
108
|
-
def in_cooldown(self) -> bool:
|
109
|
-
"""
|
110
|
-
Check if the learning rate scheduler is in the cooldown phase.
|
111
|
-
|
112
|
-
Returns:
|
113
|
-
bool: True if in cooldown, False otherwise.
|
114
|
-
"""
|
115
|
-
return self.cooldown_counter > 0
|
116
|
-
|
117
|
-
|
118
|
-
def get_reduceonplatue_scheduler(n_iter: int, initial_lr: float):
|
25
|
+
def get_reduce_lr_on_plateau_scheduler(n_iter: int, initial_lr: float):
|
119
26
|
"""
|
120
27
|
Create a custom ReduceLROnPlateau learning rate scheduler.
|
121
28
|
|
@@ -126,10 +33,11 @@ def get_reduceonplatue_scheduler(n_iter: int, initial_lr: float):
|
|
126
33
|
Returns:
|
127
34
|
callable: Partial function for creating CustomReduceLROnPlateau scheduler.
|
128
35
|
"""
|
129
|
-
return partial(
|
36
|
+
return partial(ReduceLROnPlateau, min_lr=1e-4, factor=0.5, patience=int(n_iter / 50))
|
37
|
+
|
130
38
|
|
131
39
|
|
132
40
|
# Define a dictionary that maps scheduler types to functions for creating schedulers.
|
133
41
|
scheduler_step_function_dict: Dict[SchedulerType, Callable] = {
|
134
|
-
SchedulerType.REDUCE_ON_PLATEAU:
|
42
|
+
SchedulerType.REDUCE_ON_PLATEAU: get_reduce_lr_on_plateau_scheduler, # ReduceLROnPlateau scheduler.
|
135
43
|
}
|
@@ -25,6 +25,7 @@ from model_compression_toolkit.data_generation.common.optimization_utils import
|
|
25
25
|
BatchOptimizationHolder, AllImagesStatsHolder, BatchStatsHolder
|
26
26
|
from model_compression_toolkit.data_generation.keras.constants import IMAGE_MIN_VAL, IMAGE_MAX_VAL, BATCH_AXIS, \
|
27
27
|
H_AXIS, W_AXIS
|
28
|
+
from model_compression_toolkit.data_generation.keras.image_operations import create_valid_grid
|
28
29
|
from model_compression_toolkit.data_generation.keras.model_info_exctractors import KerasActivationExtractor, \
|
29
30
|
KerasOriginalBNStatsHolder
|
30
31
|
|
@@ -64,11 +65,7 @@ class KerasImagesOptimizationHandler(ImagesOptimizationHandler):
|
|
64
65
|
# IMAGE_MIN_VAL( default set to 0) - IMAGE_MAX_VAL ( default set to 255) before normalization
|
65
66
|
self.normalization_mean = normalization_mean
|
66
67
|
self.normalization_std = normalization_std
|
67
|
-
self.valid_grid =
|
68
|
-
for i, (mean, var) in enumerate(zip(self.normalization_mean, self.normalization_std)):
|
69
|
-
min_val = (IMAGE_MIN_VAL - mean) / var
|
70
|
-
max_val = (IMAGE_MAX_VAL - mean) / var
|
71
|
-
self.valid_grid.append((min_val, max_val))
|
68
|
+
self.valid_grid = create_valid_grid(self.normalization_mean, self.normalization_std)
|
72
69
|
|
73
70
|
super(KerasImagesOptimizationHandler, self).__init__(model=model,
|
74
71
|
data_gen_batch_size=
|
@@ -83,8 +80,6 @@ class KerasImagesOptimizationHandler(ImagesOptimizationHandler):
|
|
83
80
|
initial_lr=data_generation_config.initial_lr,
|
84
81
|
normalization_mean=self.normalization_mean,
|
85
82
|
normalization_std=self.normalization_std,
|
86
|
-
clip_images=data_generation_config.clip_images,
|
87
|
-
reflection=data_generation_config.reflection,
|
88
83
|
eps=eps)
|
89
84
|
|
90
85
|
# Set the mean axis based on the image granularity
|
@@ -140,22 +135,14 @@ class KerasImagesOptimizationHandler(ImagesOptimizationHandler):
|
|
140
135
|
Returns:
|
141
136
|
tf.Tensor: Clipped and reflected tensor.
|
142
137
|
"""
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
images[:, :, :, i_ch] = 2 * clamp - z[:, :, :, i_ch]
|
152
|
-
else:
|
153
|
-
images[:, :, :, i_ch] = clamp
|
154
|
-
# Assign the clipped reflected values back to `z`.
|
155
|
-
z.assign(images)
|
156
|
-
return z
|
157
|
-
else:
|
158
|
-
return z
|
138
|
+
images = z.numpy()
|
139
|
+
for i_ch in range(len(self.valid_grid)):
|
140
|
+
# Clip the values of the channel within the valid range.
|
141
|
+
clamp = tf.clip_by_value(t=z[:, :, :, i_ch], clip_value_min=self.valid_grid[i_ch][0],
|
142
|
+
clip_value_max=self.valid_grid[i_ch][1])
|
143
|
+
# Assign the clipped reflected values back to `z`.
|
144
|
+
z.assign(images)
|
145
|
+
return z
|
159
146
|
|
160
147
|
def get_layer_accumulated_stats(self, layer_name: str) -> Tuple[tf.Tensor, tf.Tensor]:
|
161
148
|
"""
|
@@ -255,7 +242,7 @@ class KerasImagesOptimizationHandler(ImagesOptimizationHandler):
|
|
255
242
|
images: tf.Tensor,
|
256
243
|
gradients: tf.Tensor,
|
257
244
|
loss: tf.Tensor,
|
258
|
-
|
245
|
+
i_iter: int):
|
259
246
|
"""
|
260
247
|
Perform an optimization step.
|
261
248
|
|
@@ -264,7 +251,7 @@ class KerasImagesOptimizationHandler(ImagesOptimizationHandler):
|
|
264
251
|
images (tf.Tensor): The images to optimize for the batch.
|
265
252
|
gradients (List[tf.Tensor]): The gradients calculated for the images.
|
266
253
|
loss (tf.Tensor): Loss value.
|
267
|
-
|
254
|
+
i_iter (int): Current optimization iteration.
|
268
255
|
"""
|
269
256
|
# Get optimizer and scheduler for the specific batch index
|
270
257
|
optimizer = self.get_optimizer_by_batch_index(batch_index=batch_index)
|
@@ -274,7 +261,7 @@ class KerasImagesOptimizationHandler(ImagesOptimizationHandler):
|
|
274
261
|
optimizer.apply_gradients(zip(gradients, [images]))
|
275
262
|
|
276
263
|
# Perform scheduler step
|
277
|
-
scheduler.on_epoch_end(loss=tf.reduce_mean(loss))
|
264
|
+
scheduler.on_epoch_end(epoch=i_iter, loss=tf.reduce_mean(loss))
|
278
265
|
|
279
266
|
def get_finilized_data_loader(self) -> np.ndarray:
|
280
267
|
"""
|
@@ -321,8 +308,8 @@ class KerasBatchOptimizationHolder(BatchOptimizationHolder):
|
|
321
308
|
initial_lr (float): The initial learning rate used by the optimizer.
|
322
309
|
"""
|
323
310
|
self.images = images
|
324
|
-
self.optimizer = optimizer(
|
325
|
-
self.scheduler = scheduler(
|
311
|
+
self.optimizer = optimizer(learning_rate=initial_lr)
|
312
|
+
self.scheduler = scheduler(optimizer=self.optimizer)
|
326
313
|
|
327
314
|
|
328
315
|
class KerasAllImagesStatsHolder(AllImagesStatsHolder):
|
@@ -20,8 +20,11 @@ BATCH_AXIS, CHANNEL_AXIS, H_AXIS, W_AXIS = 0, 1, 2, 3
|
|
20
20
|
# Default initial learning rate constant.
|
21
21
|
DEFAULT_PYTORCH_INITIAL_LR = 16
|
22
22
|
|
23
|
+
# Default extra pixels for image padding.
|
24
|
+
DEFAULT_PYTORCH_EXTRA_PIXELS = 32
|
25
|
+
|
23
26
|
# Default output loss multiplier.
|
24
|
-
DEFAULT_PYTORCH_OUTPUT_LOSS_MULTIPLIER = 1e-
|
27
|
+
DEFAULT_PYTORCH_OUTPUT_LOSS_MULTIPLIER = 1e-5
|
25
28
|
|
26
29
|
# Default BatchNorm layer types
|
27
30
|
DEFAULT_PYTORCH_BN_LAYER_TYPES = [torch.nn.BatchNorm2d]
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
from typing import List
|
16
|
+
|
17
|
+
import numpy as np
|
18
|
+
import torch
|
19
|
+
import torch.nn.functional as F
|
20
|
+
from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device
|
21
|
+
from torchvision.transforms import Normalize
|
22
|
+
|
23
|
+
from model_compression_toolkit.logger import Logger
|
24
|
+
|
25
|
+
|
26
|
+
def create_valid_grid(means: List[int], stds: List[int]) -> torch.Tensor:
|
27
|
+
"""
|
28
|
+
Create a valid grid for image normalization.
|
29
|
+
|
30
|
+
Args:
|
31
|
+
means (List[int]): List of mean values per channel.
|
32
|
+
stds (List[int]): List of standard deviation values per channel.
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
torch.Tensor: The valid grid for image normalization.
|
36
|
+
"""
|
37
|
+
# Image valid grid
|
38
|
+
pixel_grid = torch.from_numpy(np.array(list(range(256))).repeat(3).reshape(-1, 3)).float()
|
39
|
+
valid_grid = Normalize(mean=means, std=stds)(pixel_grid.transpose(1, 0)[None, :, :, None]).squeeze().to(
|
40
|
+
get_working_device())
|
41
|
+
return valid_grid
|
42
|
+
|
43
|
+
|
44
|
+
class Smoothing(torch.nn.Module):
|
45
|
+
"""
|
46
|
+
A PyTorch module for applying Gaussian smoothing to an image.
|
47
|
+
"""
|
48
|
+
|
49
|
+
def __init__(self, size: int = 3, sigma: float = 1.25, kernel: torch.Tensor = None):
|
50
|
+
"""
|
51
|
+
Initialize the Smoothing module.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
size (int): The size of the Gaussian kernel (Default: 3).
|
55
|
+
sigma (float): The standard deviation of the Gaussian kernel (Defalut: 1.25).
|
56
|
+
kernel (torch.Tensor, optional): Precomputed Gaussian kernel. If None, it will be created.
|
57
|
+
"""
|
58
|
+
super().__init__()
|
59
|
+
if kernel is None:
|
60
|
+
kernel = self.gaussian_kernel(size, sigma)
|
61
|
+
if kernel.dim() != 2:
|
62
|
+
Logger.critical("Kernel must have 2 dimensions. Found {} dimensions.".format(kernel.dim())) # pragma: no cover
|
63
|
+
kernel = kernel.view(1, 1, kernel.shape[0], kernel.shape[1])
|
64
|
+
# Repeat for 3 color channels
|
65
|
+
kernel = kernel.repeat(3, 1, 1, 1)
|
66
|
+
self.kernel = kernel
|
67
|
+
|
68
|
+
def forward(self, image: torch.Tensor) -> torch.Tensor:
|
69
|
+
"""
|
70
|
+
Apply Gaussian smoothing to the input image.
|
71
|
+
|
72
|
+
Args:
|
73
|
+
image (torch.Tensor): The input image tensor.
|
74
|
+
|
75
|
+
Returns:
|
76
|
+
torch.Tensor: The smoothed image tensor.
|
77
|
+
"""
|
78
|
+
return F.conv2d(image, self.kernel.to(image.device), padding=self.kernel.shape[-1] // 2, groups=3)
|
79
|
+
|
80
|
+
def __repr__(self) -> str:
|
81
|
+
"""
|
82
|
+
Return the string representation of the Smoothing module.
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
str: String representation of the Smoothing module.
|
86
|
+
"""
|
87
|
+
return f"{self.__class__.__name__}(kernel={self.kernel.shape[-1]})" # pragma: no cover
|
88
|
+
|
89
|
+
@staticmethod
|
90
|
+
def gaussian_kernel(size: int = 3, sigma: float = 1) -> torch.Tensor:
|
91
|
+
"""
|
92
|
+
Create a Gaussian kernel.
|
93
|
+
|
94
|
+
Args:
|
95
|
+
size (int): The size of the Gaussian kernel.
|
96
|
+
sigma (float): The standard deviation of the Gaussian kernel.
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
torch.Tensor: The Gaussian kernel tensor.
|
100
|
+
"""
|
101
|
+
axis = torch.arange(-size // 2 + 1., size // 2 + 1.)
|
102
|
+
x, y = torch.meshgrid(axis, axis)
|
103
|
+
kernel = torch.exp(-(x ** 2 + y ** 2) / (2 * sigma ** 2))
|
104
|
+
kernel = kernel / torch.sum(kernel)
|
105
|
+
return kernel
|