ilovetools 0.2.23__tar.gz → 0.2.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ilovetools-0.2.23/ilovetools.egg-info → ilovetools-0.2.24}/PKG-INFO +2 -2
- ilovetools-0.2.24/ilovetools/ml/lr_schedulers.py +697 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24/ilovetools.egg-info}/PKG-INFO +2 -2
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools.egg-info/SOURCES.txt +2 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/pyproject.toml +2 -2
- {ilovetools-0.2.23 → ilovetools-0.2.24}/setup.py +2 -2
- ilovetools-0.2.24/tests/test_lr_schedulers.py +522 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/LICENSE +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/MANIFEST.in +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/README.md +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ai/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ai/embeddings.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ai/inference.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ai/llm_helpers.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/audio/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/automation/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/automation/file_organizer.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/conversion/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/conversion/config_converter.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/conversion/config_converter_fixed_header.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/data/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/data/feature_engineering.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/data/preprocessing.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/database/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/datetime/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/email/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/email/template_engine.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/files/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/image/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/activations.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/anomaly_detection.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/attention.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/clustering.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/cnn.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/cross_validation.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/dimensionality.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/ensemble.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/feature_selection.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/gradient_descent.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/imbalanced.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/interpretation.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/loss_functions.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/metrics.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/neural_network.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/normalization.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/normalization_advanced.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/optimizers.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/pipeline.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/positional_encoding.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/regularization.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/rnn.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/timeseries.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/tuning.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/security/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/security/password_checker.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/text/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/utils/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/utils/cache_system.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/utils/logger.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/utils/rate_limiter.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/utils/retry.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/validation/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/validation/data_validator.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/web/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/web/scraper.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/web/url_shortener.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools.egg-info/dependency_links.txt +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools.egg-info/requires.txt +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools.egg-info/top_level.txt +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/requirements.txt +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/setup.cfg +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/__init__.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_activations.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_attention.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_cnn.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_gradient_descent.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_loss_functions.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_neural_network.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_normalization.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_normalization_advanced.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_optimizers.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_positional_encoding.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_pypi_installation.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_regularization.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_rnn.py +0 -0
- {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/verify_positional_encoding.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ilovetools
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.24
|
|
4
4
|
Summary: A comprehensive Python utility library with modular tools for AI/ML, data processing, and daily programming needs
|
|
5
5
|
Home-page: https://github.com/AliMehdi512/ilovetools
|
|
6
6
|
Author: Ali Mehdi
|
|
@@ -11,7 +11,7 @@ Project-URL: Repository, https://github.com/AliMehdi512/ilovetools
|
|
|
11
11
|
Project-URL: Issues, https://github.com/AliMehdi512/ilovetools/issues
|
|
12
12
|
Project-URL: Bug Reports, https://github.com/AliMehdi512/ilovetools/issues
|
|
13
13
|
Project-URL: Source, https://github.com/AliMehdi512/ilovetools
|
|
14
|
-
Keywords: utilities,tools,ai,ml,data-processing,automation,
|
|
14
|
+
Keywords: utilities,tools,ai,ml,data-processing,automation,learning-rate-schedulers,optimization,adaptive-learning-rate,cosine-annealing,onecycle,sgdr,deep-learning,nlp
|
|
15
15
|
Classifier: Development Status :: 3 - Alpha
|
|
16
16
|
Classifier: Intended Audience :: Developers
|
|
17
17
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
@@ -0,0 +1,697 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Learning Rate Schedulers and Advanced Optimization Techniques Module
|
|
3
|
+
|
|
4
|
+
This module provides comprehensive implementations of learning rate scheduling
|
|
5
|
+
strategies and advanced optimization techniques for training deep learning models.
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Step Decay Scheduler
|
|
9
|
+
- Exponential Decay Scheduler
|
|
10
|
+
- Cosine Annealing Scheduler
|
|
11
|
+
- Cosine Annealing with Warm Restarts (SGDR)
|
|
12
|
+
- One Cycle Policy (Super-Convergence)
|
|
13
|
+
- Reduce on Plateau Scheduler
|
|
14
|
+
- Polynomial Decay Scheduler
|
|
15
|
+
- Linear Warmup Scheduler
|
|
16
|
+
- Cyclical Learning Rate
|
|
17
|
+
- Learning Rate Finder
|
|
18
|
+
|
|
19
|
+
Author: Ali Mehdi
|
|
20
|
+
License: MIT
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
from typing import Optional, Callable, List, Tuple
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ============================================================================
|
|
28
|
+
# LEARNING RATE SCHEDULERS
|
|
29
|
+
# ============================================================================
|
|
30
|
+
|
|
31
|
+
class StepLRScheduler:
|
|
32
|
+
"""
|
|
33
|
+
Step Learning Rate Scheduler
|
|
34
|
+
|
|
35
|
+
Decays the learning rate by gamma every step_size epochs.
|
|
36
|
+
Commonly used in ResNet, VGG, and other classic architectures.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
initial_lr: Initial learning rate
|
|
40
|
+
step_size: Period of learning rate decay (in epochs)
|
|
41
|
+
gamma: Multiplicative factor of learning rate decay
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, initial_lr: float, step_size: int, gamma: float = 0.1):
|
|
45
|
+
self.initial_lr = initial_lr
|
|
46
|
+
self.step_size = step_size
|
|
47
|
+
self.gamma = gamma
|
|
48
|
+
self.current_epoch = 0
|
|
49
|
+
self.current_lr = initial_lr
|
|
50
|
+
|
|
51
|
+
def step(self, epoch: Optional[int] = None) -> float:
|
|
52
|
+
"""
|
|
53
|
+
Update learning rate for the next epoch
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
epoch: Current epoch number (optional)
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Updated learning rate
|
|
60
|
+
"""
|
|
61
|
+
if epoch is not None:
|
|
62
|
+
self.current_epoch = epoch
|
|
63
|
+
else:
|
|
64
|
+
self.current_epoch += 1
|
|
65
|
+
|
|
66
|
+
self.current_lr = self.initial_lr * (self.gamma ** (self.current_epoch // self.step_size))
|
|
67
|
+
return self.current_lr
|
|
68
|
+
|
|
69
|
+
def get_lr(self) -> float:
|
|
70
|
+
"""Get current learning rate"""
|
|
71
|
+
return self.current_lr
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ExponentialLRScheduler:
|
|
75
|
+
"""
|
|
76
|
+
Exponential Learning Rate Scheduler
|
|
77
|
+
|
|
78
|
+
Decays the learning rate exponentially: lr = lr_0 * gamma^epoch
|
|
79
|
+
Provides smooth, continuous decay.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
initial_lr: Initial learning rate
|
|
83
|
+
gamma: Multiplicative factor of learning rate decay (typically 0.95-0.99)
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def __init__(self, initial_lr: float, gamma: float = 0.95):
|
|
87
|
+
self.initial_lr = initial_lr
|
|
88
|
+
self.gamma = gamma
|
|
89
|
+
self.current_epoch = 0
|
|
90
|
+
self.current_lr = initial_lr
|
|
91
|
+
|
|
92
|
+
def step(self, epoch: Optional[int] = None) -> float:
|
|
93
|
+
"""Update learning rate"""
|
|
94
|
+
if epoch is not None:
|
|
95
|
+
self.current_epoch = epoch
|
|
96
|
+
else:
|
|
97
|
+
self.current_epoch += 1
|
|
98
|
+
|
|
99
|
+
self.current_lr = self.initial_lr * (self.gamma ** self.current_epoch)
|
|
100
|
+
return self.current_lr
|
|
101
|
+
|
|
102
|
+
def get_lr(self) -> float:
|
|
103
|
+
"""Get current learning rate"""
|
|
104
|
+
return self.current_lr
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class CosineAnnealingLR:
|
|
108
|
+
"""
|
|
109
|
+
Cosine Annealing Learning Rate Scheduler
|
|
110
|
+
|
|
111
|
+
Sets the learning rate using a cosine annealing schedule:
|
|
112
|
+
lr = lr_min + 0.5 * (lr_max - lr_min) * (1 + cos(pi * epoch / T_max))
|
|
113
|
+
|
|
114
|
+
Used in modern transformers and state-of-the-art models.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
initial_lr: Maximum learning rate
|
|
118
|
+
T_max: Maximum number of iterations
|
|
119
|
+
eta_min: Minimum learning rate (default: 0)
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
def __init__(self, initial_lr: float, T_max: int, eta_min: float = 0):
|
|
123
|
+
self.initial_lr = initial_lr
|
|
124
|
+
self.T_max = T_max
|
|
125
|
+
self.eta_min = eta_min
|
|
126
|
+
self.current_epoch = 0
|
|
127
|
+
self.current_lr = initial_lr
|
|
128
|
+
|
|
129
|
+
def step(self, epoch: Optional[int] = None) -> float:
|
|
130
|
+
"""Update learning rate"""
|
|
131
|
+
if epoch is not None:
|
|
132
|
+
self.current_epoch = epoch
|
|
133
|
+
else:
|
|
134
|
+
self.current_epoch += 1
|
|
135
|
+
|
|
136
|
+
self.current_lr = self.eta_min + (self.initial_lr - self.eta_min) * \
|
|
137
|
+
(1 + np.cos(np.pi * self.current_epoch / self.T_max)) / 2
|
|
138
|
+
return self.current_lr
|
|
139
|
+
|
|
140
|
+
def get_lr(self) -> float:
|
|
141
|
+
"""Get current learning rate"""
|
|
142
|
+
return self.current_lr
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class CosineAnnealingWarmRestarts:
|
|
146
|
+
"""
|
|
147
|
+
Cosine Annealing with Warm Restarts (SGDR)
|
|
148
|
+
|
|
149
|
+
Implements SGDR: Stochastic Gradient Descent with Warm Restarts.
|
|
150
|
+
Periodically resets the learning rate to help escape local minima.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
initial_lr: Maximum learning rate
|
|
154
|
+
T_0: Number of iterations for the first restart
|
|
155
|
+
T_mult: Factor to increase T_i after each restart (default: 1)
|
|
156
|
+
eta_min: Minimum learning rate (default: 0)
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
def __init__(self, initial_lr: float, T_0: int, T_mult: int = 1, eta_min: float = 0):
|
|
160
|
+
self.initial_lr = initial_lr
|
|
161
|
+
self.T_0 = T_0
|
|
162
|
+
self.T_mult = T_mult
|
|
163
|
+
self.eta_min = eta_min
|
|
164
|
+
self.current_epoch = 0
|
|
165
|
+
self.T_cur = 0
|
|
166
|
+
self.T_i = T_0
|
|
167
|
+
self.current_lr = initial_lr
|
|
168
|
+
|
|
169
|
+
def step(self, epoch: Optional[int] = None) -> float:
|
|
170
|
+
"""Update learning rate"""
|
|
171
|
+
if epoch is None:
|
|
172
|
+
epoch = self.current_epoch + 1
|
|
173
|
+
|
|
174
|
+
self.current_epoch = epoch
|
|
175
|
+
self.T_cur = epoch % self.T_i
|
|
176
|
+
|
|
177
|
+
# Check if we need to restart
|
|
178
|
+
if self.T_cur == 0 and epoch > 0:
|
|
179
|
+
self.T_i = self.T_i * self.T_mult
|
|
180
|
+
|
|
181
|
+
self.current_lr = self.eta_min + (self.initial_lr - self.eta_min) * \
|
|
182
|
+
(1 + np.cos(np.pi * self.T_cur / self.T_i)) / 2
|
|
183
|
+
return self.current_lr
|
|
184
|
+
|
|
185
|
+
def get_lr(self) -> float:
|
|
186
|
+
"""Get current learning rate"""
|
|
187
|
+
return self.current_lr
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class OneCycleLR:
|
|
191
|
+
"""
|
|
192
|
+
One Cycle Learning Rate Policy
|
|
193
|
+
|
|
194
|
+
Implements the One Cycle Policy for super-convergence.
|
|
195
|
+
Single cycle: warmup -> peak -> decay
|
|
196
|
+
|
|
197
|
+
Can significantly reduce training time while improving performance.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
max_lr: Maximum learning rate
|
|
201
|
+
total_steps: Total number of training steps
|
|
202
|
+
pct_start: Percentage of cycle spent increasing LR (default: 0.3)
|
|
203
|
+
anneal_strategy: 'cos' or 'linear' (default: 'cos')
|
|
204
|
+
div_factor: Initial LR = max_lr / div_factor (default: 25)
|
|
205
|
+
final_div_factor: Final LR = max_lr / final_div_factor (default: 10000)
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
def __init__(
|
|
209
|
+
self,
|
|
210
|
+
max_lr: float,
|
|
211
|
+
total_steps: int,
|
|
212
|
+
pct_start: float = 0.3,
|
|
213
|
+
anneal_strategy: str = 'cos',
|
|
214
|
+
div_factor: float = 25.0,
|
|
215
|
+
final_div_factor: float = 10000.0
|
|
216
|
+
):
|
|
217
|
+
self.max_lr = max_lr
|
|
218
|
+
self.total_steps = total_steps
|
|
219
|
+
self.pct_start = pct_start
|
|
220
|
+
self.anneal_strategy = anneal_strategy
|
|
221
|
+
self.initial_lr = max_lr / div_factor
|
|
222
|
+
self.final_lr = max_lr / final_div_factor
|
|
223
|
+
|
|
224
|
+
self.step_size_up = int(total_steps * pct_start)
|
|
225
|
+
self.step_size_down = total_steps - self.step_size_up
|
|
226
|
+
|
|
227
|
+
self.current_step = 0
|
|
228
|
+
self.current_lr = self.initial_lr
|
|
229
|
+
|
|
230
|
+
def step(self) -> float:
|
|
231
|
+
"""Update learning rate for next step"""
|
|
232
|
+
self.current_step += 1
|
|
233
|
+
|
|
234
|
+
if self.current_step <= self.step_size_up:
|
|
235
|
+
# Warmup phase
|
|
236
|
+
pct = self.current_step / self.step_size_up
|
|
237
|
+
self.current_lr = self.initial_lr + (self.max_lr - self.initial_lr) * pct
|
|
238
|
+
else:
|
|
239
|
+
# Annealing phase
|
|
240
|
+
pct = (self.current_step - self.step_size_up) / self.step_size_down
|
|
241
|
+
|
|
242
|
+
if self.anneal_strategy == 'cos':
|
|
243
|
+
self.current_lr = self.final_lr + (self.max_lr - self.final_lr) * \
|
|
244
|
+
(1 + np.cos(np.pi * pct)) / 2
|
|
245
|
+
else: # linear
|
|
246
|
+
self.current_lr = self.max_lr - (self.max_lr - self.final_lr) * pct
|
|
247
|
+
|
|
248
|
+
return self.current_lr
|
|
249
|
+
|
|
250
|
+
def get_lr(self) -> float:
|
|
251
|
+
"""Get current learning rate"""
|
|
252
|
+
return self.current_lr
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
class ReduceLROnPlateau:
|
|
256
|
+
"""
|
|
257
|
+
Reduce Learning Rate on Plateau
|
|
258
|
+
|
|
259
|
+
Reduces learning rate when a metric has stopped improving.
|
|
260
|
+
Adaptive scheduler based on validation performance.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
initial_lr: Initial learning rate
|
|
264
|
+
mode: 'min' or 'max' (default: 'min')
|
|
265
|
+
factor: Factor by which LR will be reduced (default: 0.1)
|
|
266
|
+
patience: Number of epochs with no improvement (default: 10)
|
|
267
|
+
threshold: Threshold for measuring improvement (default: 1e-4)
|
|
268
|
+
min_lr: Minimum learning rate (default: 0)
|
|
269
|
+
"""
|
|
270
|
+
|
|
271
|
+
def __init__(
|
|
272
|
+
self,
|
|
273
|
+
initial_lr: float,
|
|
274
|
+
mode: str = 'min',
|
|
275
|
+
factor: float = 0.1,
|
|
276
|
+
patience: int = 10,
|
|
277
|
+
threshold: float = 1e-4,
|
|
278
|
+
min_lr: float = 0
|
|
279
|
+
):
|
|
280
|
+
self.initial_lr = initial_lr
|
|
281
|
+
self.mode = mode
|
|
282
|
+
self.factor = factor
|
|
283
|
+
self.patience = patience
|
|
284
|
+
self.threshold = threshold
|
|
285
|
+
self.min_lr = min_lr
|
|
286
|
+
|
|
287
|
+
self.current_lr = initial_lr
|
|
288
|
+
self.best_value = np.inf if mode == 'min' else -np.inf
|
|
289
|
+
self.num_bad_epochs = 0
|
|
290
|
+
self.cooldown_counter = 0
|
|
291
|
+
|
|
292
|
+
def step(self, metric: float) -> float:
|
|
293
|
+
"""
|
|
294
|
+
Update learning rate based on metric
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
metric: Current metric value (e.g., validation loss)
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Updated learning rate
|
|
301
|
+
"""
|
|
302
|
+
if self.cooldown_counter > 0:
|
|
303
|
+
self.cooldown_counter -= 1
|
|
304
|
+
return self.current_lr
|
|
305
|
+
|
|
306
|
+
# Check if metric improved
|
|
307
|
+
if self.mode == 'min':
|
|
308
|
+
improved = metric < self.best_value - self.threshold
|
|
309
|
+
else:
|
|
310
|
+
improved = metric > self.best_value + self.threshold
|
|
311
|
+
|
|
312
|
+
if improved:
|
|
313
|
+
self.best_value = metric
|
|
314
|
+
self.num_bad_epochs = 0
|
|
315
|
+
else:
|
|
316
|
+
self.num_bad_epochs += 1
|
|
317
|
+
|
|
318
|
+
# Reduce LR if no improvement for patience epochs
|
|
319
|
+
if self.num_bad_epochs >= self.patience:
|
|
320
|
+
self.current_lr = max(self.current_lr * self.factor, self.min_lr)
|
|
321
|
+
self.num_bad_epochs = 0
|
|
322
|
+
self.cooldown_counter = self.patience
|
|
323
|
+
|
|
324
|
+
return self.current_lr
|
|
325
|
+
|
|
326
|
+
def get_lr(self) -> float:
|
|
327
|
+
"""Get current learning rate"""
|
|
328
|
+
return self.current_lr
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class PolynomialLRScheduler:
|
|
332
|
+
"""
|
|
333
|
+
Polynomial Learning Rate Decay
|
|
334
|
+
|
|
335
|
+
Decays learning rate using polynomial function.
|
|
336
|
+
Used in BERT and other transformer models.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
initial_lr: Initial learning rate
|
|
340
|
+
total_steps: Total number of training steps
|
|
341
|
+
power: Polynomial power (default: 1.0 for linear)
|
|
342
|
+
end_lr: Minimum learning rate (default: 0)
|
|
343
|
+
"""
|
|
344
|
+
|
|
345
|
+
def __init__(
|
|
346
|
+
self,
|
|
347
|
+
initial_lr: float,
|
|
348
|
+
total_steps: int,
|
|
349
|
+
power: float = 1.0,
|
|
350
|
+
end_lr: float = 0
|
|
351
|
+
):
|
|
352
|
+
self.initial_lr = initial_lr
|
|
353
|
+
self.total_steps = total_steps
|
|
354
|
+
self.power = power
|
|
355
|
+
self.end_lr = end_lr
|
|
356
|
+
self.current_step = 0
|
|
357
|
+
self.current_lr = initial_lr
|
|
358
|
+
|
|
359
|
+
def step(self) -> float:
|
|
360
|
+
"""Update learning rate"""
|
|
361
|
+
self.current_step += 1
|
|
362
|
+
|
|
363
|
+
if self.current_step >= self.total_steps:
|
|
364
|
+
self.current_lr = self.end_lr
|
|
365
|
+
else:
|
|
366
|
+
decay_factor = (1 - self.current_step / self.total_steps) ** self.power
|
|
367
|
+
self.current_lr = (self.initial_lr - self.end_lr) * decay_factor + self.end_lr
|
|
368
|
+
|
|
369
|
+
return self.current_lr
|
|
370
|
+
|
|
371
|
+
def get_lr(self) -> float:
|
|
372
|
+
"""Get current learning rate"""
|
|
373
|
+
return self.current_lr
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
class LinearWarmupScheduler:
|
|
377
|
+
"""
|
|
378
|
+
Linear Warmup Scheduler
|
|
379
|
+
|
|
380
|
+
Linearly increases learning rate from 0 to target over warmup steps.
|
|
381
|
+
Often combined with other schedulers for stable training start.
|
|
382
|
+
|
|
383
|
+
Args:
|
|
384
|
+
target_lr: Target learning rate after warmup
|
|
385
|
+
warmup_steps: Number of warmup steps
|
|
386
|
+
"""
|
|
387
|
+
|
|
388
|
+
def __init__(self, target_lr: float, warmup_steps: int):
|
|
389
|
+
self.target_lr = target_lr
|
|
390
|
+
self.warmup_steps = warmup_steps
|
|
391
|
+
self.current_step = 0
|
|
392
|
+
self.current_lr = 0
|
|
393
|
+
|
|
394
|
+
def step(self) -> float:
|
|
395
|
+
"""Update learning rate"""
|
|
396
|
+
self.current_step += 1
|
|
397
|
+
|
|
398
|
+
if self.current_step >= self.warmup_steps:
|
|
399
|
+
self.current_lr = self.target_lr
|
|
400
|
+
else:
|
|
401
|
+
self.current_lr = self.target_lr * (self.current_step / self.warmup_steps)
|
|
402
|
+
|
|
403
|
+
return self.current_lr
|
|
404
|
+
|
|
405
|
+
def get_lr(self) -> float:
|
|
406
|
+
"""Get current learning rate"""
|
|
407
|
+
return self.current_lr
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
class CyclicalLR:
|
|
411
|
+
"""
|
|
412
|
+
Cyclical Learning Rate
|
|
413
|
+
|
|
414
|
+
Cycles learning rate between base_lr and max_lr.
|
|
415
|
+
Helps explore loss landscape and escape local minima.
|
|
416
|
+
|
|
417
|
+
Args:
|
|
418
|
+
base_lr: Minimum learning rate
|
|
419
|
+
max_lr: Maximum learning rate
|
|
420
|
+
step_size: Half cycle length (in steps)
|
|
421
|
+
mode: 'triangular', 'triangular2', or 'exp_range'
|
|
422
|
+
gamma: Decay constant for exp_range mode (default: 1.0)
|
|
423
|
+
"""
|
|
424
|
+
|
|
425
|
+
def __init__(
|
|
426
|
+
self,
|
|
427
|
+
base_lr: float,
|
|
428
|
+
max_lr: float,
|
|
429
|
+
step_size: int,
|
|
430
|
+
mode: str = 'triangular',
|
|
431
|
+
gamma: float = 1.0
|
|
432
|
+
):
|
|
433
|
+
self.base_lr = base_lr
|
|
434
|
+
self.max_lr = max_lr
|
|
435
|
+
self.step_size = step_size
|
|
436
|
+
self.mode = mode
|
|
437
|
+
self.gamma = gamma
|
|
438
|
+
|
|
439
|
+
self.current_step = 0
|
|
440
|
+
self.cycle = 0
|
|
441
|
+
self.current_lr = base_lr
|
|
442
|
+
|
|
443
|
+
def step(self) -> float:
|
|
444
|
+
"""Update learning rate"""
|
|
445
|
+
self.current_step += 1
|
|
446
|
+
self.cycle = np.floor(1 + self.current_step / (2 * self.step_size))
|
|
447
|
+
x = np.abs(self.current_step / self.step_size - 2 * self.cycle + 1)
|
|
448
|
+
|
|
449
|
+
if self.mode == 'triangular':
|
|
450
|
+
scale_factor = 1.0
|
|
451
|
+
elif self.mode == 'triangular2':
|
|
452
|
+
scale_factor = 1 / (2 ** (self.cycle - 1))
|
|
453
|
+
else: # exp_range
|
|
454
|
+
scale_factor = self.gamma ** self.current_step
|
|
455
|
+
|
|
456
|
+
self.current_lr = self.base_lr + (self.max_lr - self.base_lr) * \
|
|
457
|
+
max(0, (1 - x)) * scale_factor
|
|
458
|
+
|
|
459
|
+
return self.current_lr
|
|
460
|
+
|
|
461
|
+
def get_lr(self) -> float:
|
|
462
|
+
"""Get current learning rate"""
|
|
463
|
+
return self.current_lr
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
class LRFinder:
|
|
467
|
+
"""
|
|
468
|
+
Learning Rate Finder
|
|
469
|
+
|
|
470
|
+
Finds optimal learning rate by gradually increasing LR and
|
|
471
|
+
monitoring loss. Helps determine good initial learning rate.
|
|
472
|
+
|
|
473
|
+
Based on Leslie Smith's LR range test.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
start_lr: Starting learning rate (default: 1e-7)
|
|
477
|
+
end_lr: Ending learning rate (default: 10)
|
|
478
|
+
num_steps: Number of steps for the test (default: 100)
|
|
479
|
+
"""
|
|
480
|
+
|
|
481
|
+
def __init__(
|
|
482
|
+
self,
|
|
483
|
+
start_lr: float = 1e-7,
|
|
484
|
+
end_lr: float = 10,
|
|
485
|
+
num_steps: int = 100
|
|
486
|
+
):
|
|
487
|
+
self.start_lr = start_lr
|
|
488
|
+
self.end_lr = end_lr
|
|
489
|
+
self.num_steps = num_steps
|
|
490
|
+
|
|
491
|
+
self.current_step = 0
|
|
492
|
+
self.current_lr = start_lr
|
|
493
|
+
self.lr_history = []
|
|
494
|
+
self.loss_history = []
|
|
495
|
+
|
|
496
|
+
# Calculate multiplicative factor
|
|
497
|
+
self.mult_factor = (end_lr / start_lr) ** (1 / num_steps)
|
|
498
|
+
|
|
499
|
+
def step(self, loss: float) -> float:
|
|
500
|
+
"""
|
|
501
|
+
Update learning rate and record loss
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
loss: Current training loss
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
Updated learning rate
|
|
508
|
+
"""
|
|
509
|
+
self.lr_history.append(self.current_lr)
|
|
510
|
+
self.loss_history.append(loss)
|
|
511
|
+
|
|
512
|
+
self.current_step += 1
|
|
513
|
+
self.current_lr = self.start_lr * (self.mult_factor ** self.current_step)
|
|
514
|
+
|
|
515
|
+
return self.current_lr
|
|
516
|
+
|
|
517
|
+
def get_lr(self) -> float:
|
|
518
|
+
"""Get current learning rate"""
|
|
519
|
+
return self.current_lr
|
|
520
|
+
|
|
521
|
+
def plot_results(self) -> Tuple[List[float], List[float]]:
|
|
522
|
+
"""
|
|
523
|
+
Get LR and loss history for plotting
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
Tuple of (lr_history, loss_history)
|
|
527
|
+
"""
|
|
528
|
+
return self.lr_history, self.loss_history
|
|
529
|
+
|
|
530
|
+
def suggest_lr(self) -> float:
|
|
531
|
+
"""
|
|
532
|
+
Suggest optimal learning rate based on loss curve
|
|
533
|
+
|
|
534
|
+
Returns:
|
|
535
|
+
Suggested learning rate
|
|
536
|
+
"""
|
|
537
|
+
if len(self.loss_history) < 2:
|
|
538
|
+
return self.start_lr
|
|
539
|
+
|
|
540
|
+
# Find LR with steepest negative gradient
|
|
541
|
+
losses = np.array(self.loss_history)
|
|
542
|
+
lrs = np.array(self.lr_history)
|
|
543
|
+
|
|
544
|
+
# Smooth losses
|
|
545
|
+
window = min(5, len(losses) // 10)
|
|
546
|
+
if window > 1:
|
|
547
|
+
losses = np.convolve(losses, np.ones(window)/window, mode='valid')
|
|
548
|
+
lrs = lrs[:len(losses)]
|
|
549
|
+
|
|
550
|
+
# Find steepest descent
|
|
551
|
+
gradients = np.gradient(losses)
|
|
552
|
+
min_gradient_idx = np.argmin(gradients)
|
|
553
|
+
|
|
554
|
+
# Suggest LR at steepest descent
|
|
555
|
+
suggested_lr = lrs[min_gradient_idx]
|
|
556
|
+
|
|
557
|
+
return suggested_lr
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
# ============================================================================
|
|
561
|
+
# COMBINED SCHEDULERS
|
|
562
|
+
# ============================================================================
|
|
563
|
+
|
|
564
|
+
class WarmupCosineScheduler:
|
|
565
|
+
"""
|
|
566
|
+
Warmup + Cosine Annealing Scheduler
|
|
567
|
+
|
|
568
|
+
Combines linear warmup with cosine annealing.
|
|
569
|
+
Common in transformer training (BERT, GPT, etc.)
|
|
570
|
+
|
|
571
|
+
Args:
|
|
572
|
+
max_lr: Maximum learning rate
|
|
573
|
+
warmup_steps: Number of warmup steps
|
|
574
|
+
total_steps: Total number of training steps
|
|
575
|
+
min_lr: Minimum learning rate (default: 0)
|
|
576
|
+
"""
|
|
577
|
+
|
|
578
|
+
def __init__(
|
|
579
|
+
self,
|
|
580
|
+
max_lr: float,
|
|
581
|
+
warmup_steps: int,
|
|
582
|
+
total_steps: int,
|
|
583
|
+
min_lr: float = 0
|
|
584
|
+
):
|
|
585
|
+
self.max_lr = max_lr
|
|
586
|
+
self.warmup_steps = warmup_steps
|
|
587
|
+
self.total_steps = total_steps
|
|
588
|
+
self.min_lr = min_lr
|
|
589
|
+
|
|
590
|
+
self.current_step = 0
|
|
591
|
+
self.current_lr = 0
|
|
592
|
+
|
|
593
|
+
def step(self) -> float:
|
|
594
|
+
"""Update learning rate"""
|
|
595
|
+
self.current_step += 1
|
|
596
|
+
|
|
597
|
+
if self.current_step <= self.warmup_steps:
|
|
598
|
+
# Warmup phase
|
|
599
|
+
self.current_lr = self.max_lr * (self.current_step / self.warmup_steps)
|
|
600
|
+
else:
|
|
601
|
+
# Cosine annealing phase
|
|
602
|
+
progress = (self.current_step - self.warmup_steps) / \
|
|
603
|
+
(self.total_steps - self.warmup_steps)
|
|
604
|
+
self.current_lr = self.min_lr + (self.max_lr - self.min_lr) * \
|
|
605
|
+
(1 + np.cos(np.pi * progress)) / 2
|
|
606
|
+
|
|
607
|
+
return self.current_lr
|
|
608
|
+
|
|
609
|
+
def get_lr(self) -> float:
|
|
610
|
+
"""Get current learning rate"""
|
|
611
|
+
return self.current_lr
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
# ============================================================================
|
|
615
|
+
# UTILITY FUNCTIONS
|
|
616
|
+
# ============================================================================
|
|
617
|
+
|
|
618
|
+
def get_scheduler(
|
|
619
|
+
scheduler_name: str,
|
|
620
|
+
initial_lr: float,
|
|
621
|
+
**kwargs
|
|
622
|
+
) -> object:
|
|
623
|
+
"""
|
|
624
|
+
Factory function to create scheduler by name
|
|
625
|
+
|
|
626
|
+
Args:
|
|
627
|
+
scheduler_name: Name of the scheduler
|
|
628
|
+
initial_lr: Initial learning rate
|
|
629
|
+
**kwargs: Additional scheduler-specific arguments
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
Scheduler instance
|
|
633
|
+
"""
|
|
634
|
+
schedulers = {
|
|
635
|
+
'step': StepLRScheduler,
|
|
636
|
+
'exponential': ExponentialLRScheduler,
|
|
637
|
+
'cosine': CosineAnnealingLR,
|
|
638
|
+
'cosine_restarts': CosineAnnealingWarmRestarts,
|
|
639
|
+
'onecycle': OneCycleLR,
|
|
640
|
+
'plateau': ReduceLROnPlateau,
|
|
641
|
+
'polynomial': PolynomialLRScheduler,
|
|
642
|
+
'warmup': LinearWarmupScheduler,
|
|
643
|
+
'cyclical': CyclicalLR,
|
|
644
|
+
'warmup_cosine': WarmupCosineScheduler,
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
if scheduler_name not in schedulers:
|
|
648
|
+
raise ValueError(f"Unknown scheduler: {scheduler_name}")
|
|
649
|
+
|
|
650
|
+
return schedulers[scheduler_name](initial_lr, **kwargs)
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
# ============================================================================
|
|
654
|
+
# ALIASES FOR CONVENIENCE
|
|
655
|
+
# ============================================================================
|
|
656
|
+
|
|
657
|
+
step_lr = StepLRScheduler
|
|
658
|
+
exp_lr = ExponentialLRScheduler
|
|
659
|
+
cosine_lr = CosineAnnealingLR
|
|
660
|
+
sgdr = CosineAnnealingWarmRestarts
|
|
661
|
+
onecycle = OneCycleLR
|
|
662
|
+
plateau_lr = ReduceLROnPlateau
|
|
663
|
+
poly_lr = PolynomialLRScheduler
|
|
664
|
+
warmup_lr = LinearWarmupScheduler
|
|
665
|
+
cyclical_lr = CyclicalLR
|
|
666
|
+
lr_finder = LRFinder
|
|
667
|
+
warmup_cosine = WarmupCosineScheduler
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
__all__ = [
|
|
671
|
+
# Scheduler Classes
|
|
672
|
+
'StepLRScheduler',
|
|
673
|
+
'ExponentialLRScheduler',
|
|
674
|
+
'CosineAnnealingLR',
|
|
675
|
+
'CosineAnnealingWarmRestarts',
|
|
676
|
+
'OneCycleLR',
|
|
677
|
+
'ReduceLROnPlateau',
|
|
678
|
+
'PolynomialLRScheduler',
|
|
679
|
+
'LinearWarmupScheduler',
|
|
680
|
+
'CyclicalLR',
|
|
681
|
+
'LRFinder',
|
|
682
|
+
'WarmupCosineScheduler',
|
|
683
|
+
# Utility Functions
|
|
684
|
+
'get_scheduler',
|
|
685
|
+
# Aliases
|
|
686
|
+
'step_lr',
|
|
687
|
+
'exp_lr',
|
|
688
|
+
'cosine_lr',
|
|
689
|
+
'sgdr',
|
|
690
|
+
'onecycle',
|
|
691
|
+
'plateau_lr',
|
|
692
|
+
'poly_lr',
|
|
693
|
+
'warmup_lr',
|
|
694
|
+
'cyclical_lr',
|
|
695
|
+
'lr_finder',
|
|
696
|
+
'warmup_cosine',
|
|
697
|
+
]
|