ilovetools 0.2.23__tar.gz → 0.2.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {ilovetools-0.2.23/ilovetools.egg-info → ilovetools-0.2.24}/PKG-INFO +2 -2
  2. ilovetools-0.2.24/ilovetools/ml/lr_schedulers.py +697 -0
  3. {ilovetools-0.2.23 → ilovetools-0.2.24/ilovetools.egg-info}/PKG-INFO +2 -2
  4. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools.egg-info/SOURCES.txt +2 -0
  5. {ilovetools-0.2.23 → ilovetools-0.2.24}/pyproject.toml +2 -2
  6. {ilovetools-0.2.23 → ilovetools-0.2.24}/setup.py +2 -2
  7. ilovetools-0.2.24/tests/test_lr_schedulers.py +522 -0
  8. {ilovetools-0.2.23 → ilovetools-0.2.24}/LICENSE +0 -0
  9. {ilovetools-0.2.23 → ilovetools-0.2.24}/MANIFEST.in +0 -0
  10. {ilovetools-0.2.23 → ilovetools-0.2.24}/README.md +0 -0
  11. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/__init__.py +0 -0
  12. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ai/__init__.py +0 -0
  13. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ai/embeddings.py +0 -0
  14. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ai/inference.py +0 -0
  15. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ai/llm_helpers.py +0 -0
  16. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/audio/__init__.py +0 -0
  17. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/automation/__init__.py +0 -0
  18. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/automation/file_organizer.py +0 -0
  19. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/conversion/__init__.py +0 -0
  20. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/conversion/config_converter.py +0 -0
  21. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/conversion/config_converter_fixed_header.py +0 -0
  22. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/data/__init__.py +0 -0
  23. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/data/feature_engineering.py +0 -0
  24. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/data/preprocessing.py +0 -0
  25. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/database/__init__.py +0 -0
  26. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/datetime/__init__.py +0 -0
  27. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/email/__init__.py +0 -0
  28. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/email/template_engine.py +0 -0
  29. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/files/__init__.py +0 -0
  30. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/image/__init__.py +0 -0
  31. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/__init__.py +0 -0
  32. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/activations.py +0 -0
  33. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/anomaly_detection.py +0 -0
  34. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/attention.py +0 -0
  35. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/clustering.py +0 -0
  36. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/cnn.py +0 -0
  37. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/cross_validation.py +0 -0
  38. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/dimensionality.py +0 -0
  39. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/ensemble.py +0 -0
  40. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/feature_selection.py +0 -0
  41. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/gradient_descent.py +0 -0
  42. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/imbalanced.py +0 -0
  43. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/interpretation.py +0 -0
  44. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/loss_functions.py +0 -0
  45. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/metrics.py +0 -0
  46. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/neural_network.py +0 -0
  47. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/normalization.py +0 -0
  48. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/normalization_advanced.py +0 -0
  49. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/optimizers.py +0 -0
  50. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/pipeline.py +0 -0
  51. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/positional_encoding.py +0 -0
  52. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/regularization.py +0 -0
  53. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/rnn.py +0 -0
  54. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/timeseries.py +0 -0
  55. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/ml/tuning.py +0 -0
  56. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/security/__init__.py +0 -0
  57. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/security/password_checker.py +0 -0
  58. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/text/__init__.py +0 -0
  59. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/utils/__init__.py +0 -0
  60. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/utils/cache_system.py +0 -0
  61. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/utils/logger.py +0 -0
  62. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/utils/rate_limiter.py +0 -0
  63. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/utils/retry.py +0 -0
  64. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/validation/__init__.py +0 -0
  65. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/validation/data_validator.py +0 -0
  66. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/web/__init__.py +0 -0
  67. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/web/scraper.py +0 -0
  68. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools/web/url_shortener.py +0 -0
  69. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools.egg-info/dependency_links.txt +0 -0
  70. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools.egg-info/requires.txt +0 -0
  71. {ilovetools-0.2.23 → ilovetools-0.2.24}/ilovetools.egg-info/top_level.txt +0 -0
  72. {ilovetools-0.2.23 → ilovetools-0.2.24}/requirements.txt +0 -0
  73. {ilovetools-0.2.23 → ilovetools-0.2.24}/setup.cfg +0 -0
  74. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/__init__.py +0 -0
  75. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_activations.py +0 -0
  76. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_attention.py +0 -0
  77. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_cnn.py +0 -0
  78. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_gradient_descent.py +0 -0
  79. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_loss_functions.py +0 -0
  80. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_neural_network.py +0 -0
  81. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_normalization.py +0 -0
  82. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_normalization_advanced.py +0 -0
  83. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_optimizers.py +0 -0
  84. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_positional_encoding.py +0 -0
  85. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_pypi_installation.py +0 -0
  86. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_regularization.py +0 -0
  87. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/test_rnn.py +0 -0
  88. {ilovetools-0.2.23 → ilovetools-0.2.24}/tests/verify_positional_encoding.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ilovetools
3
- Version: 0.2.23
3
+ Version: 0.2.24
4
4
  Summary: A comprehensive Python utility library with modular tools for AI/ML, data processing, and daily programming needs
5
5
  Home-page: https://github.com/AliMehdi512/ilovetools
6
6
  Author: Ali Mehdi
@@ -11,7 +11,7 @@ Project-URL: Repository, https://github.com/AliMehdi512/ilovetools
11
11
  Project-URL: Issues, https://github.com/AliMehdi512/ilovetools/issues
12
12
  Project-URL: Bug Reports, https://github.com/AliMehdi512/ilovetools/issues
13
13
  Project-URL: Source, https://github.com/AliMehdi512/ilovetools
14
- Keywords: utilities,tools,ai,ml,data-processing,automation,transformers,positional-encoding,attention-mechanism,multi-head-attention,rope,alibi,deep-learning,nlp
14
+ Keywords: utilities,tools,ai,ml,data-processing,automation,learning-rate-schedulers,optimization,adaptive-learning-rate,cosine-annealing,onecycle,sgdr,deep-learning,nlp
15
15
  Classifier: Development Status :: 3 - Alpha
16
16
  Classifier: Intended Audience :: Developers
17
17
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
@@ -0,0 +1,697 @@
1
+ """
2
+ Learning Rate Schedulers and Advanced Optimization Techniques Module
3
+
4
+ This module provides comprehensive implementations of learning rate scheduling
5
+ strategies and advanced optimization techniques for training deep learning models.
6
+
7
+ Features:
8
+ - Step Decay Scheduler
9
+ - Exponential Decay Scheduler
10
+ - Cosine Annealing Scheduler
11
+ - Cosine Annealing with Warm Restarts (SGDR)
12
+ - One Cycle Policy (Super-Convergence)
13
+ - Reduce on Plateau Scheduler
14
+ - Polynomial Decay Scheduler
15
+ - Linear Warmup Scheduler
16
+ - Cyclical Learning Rate
17
+ - Learning Rate Finder
18
+
19
+ Author: Ali Mehdi
20
+ License: MIT
21
+ """
22
+
23
+ import numpy as np
24
+ from typing import Optional, Callable, List, Tuple
25
+
26
+
27
+ # ============================================================================
28
+ # LEARNING RATE SCHEDULERS
29
+ # ============================================================================
30
+
31
+ class StepLRScheduler:
32
+ """
33
+ Step Learning Rate Scheduler
34
+
35
+ Decays the learning rate by gamma every step_size epochs.
36
+ Commonly used in ResNet, VGG, and other classic architectures.
37
+
38
+ Args:
39
+ initial_lr: Initial learning rate
40
+ step_size: Period of learning rate decay (in epochs)
41
+ gamma: Multiplicative factor of learning rate decay
42
+ """
43
+
44
+ def __init__(self, initial_lr: float, step_size: int, gamma: float = 0.1):
45
+ self.initial_lr = initial_lr
46
+ self.step_size = step_size
47
+ self.gamma = gamma
48
+ self.current_epoch = 0
49
+ self.current_lr = initial_lr
50
+
51
+ def step(self, epoch: Optional[int] = None) -> float:
52
+ """
53
+ Update learning rate for the next epoch
54
+
55
+ Args:
56
+ epoch: Current epoch number (optional)
57
+
58
+ Returns:
59
+ Updated learning rate
60
+ """
61
+ if epoch is not None:
62
+ self.current_epoch = epoch
63
+ else:
64
+ self.current_epoch += 1
65
+
66
+ self.current_lr = self.initial_lr * (self.gamma ** (self.current_epoch // self.step_size))
67
+ return self.current_lr
68
+
69
+ def get_lr(self) -> float:
70
+ """Get current learning rate"""
71
+ return self.current_lr
72
+
73
+
74
+ class ExponentialLRScheduler:
75
+ """
76
+ Exponential Learning Rate Scheduler
77
+
78
+ Decays the learning rate exponentially: lr = lr_0 * gamma^epoch
79
+ Provides smooth, continuous decay.
80
+
81
+ Args:
82
+ initial_lr: Initial learning rate
83
+ gamma: Multiplicative factor of learning rate decay (typically 0.95-0.99)
84
+ """
85
+
86
+ def __init__(self, initial_lr: float, gamma: float = 0.95):
87
+ self.initial_lr = initial_lr
88
+ self.gamma = gamma
89
+ self.current_epoch = 0
90
+ self.current_lr = initial_lr
91
+
92
+ def step(self, epoch: Optional[int] = None) -> float:
93
+ """Update learning rate"""
94
+ if epoch is not None:
95
+ self.current_epoch = epoch
96
+ else:
97
+ self.current_epoch += 1
98
+
99
+ self.current_lr = self.initial_lr * (self.gamma ** self.current_epoch)
100
+ return self.current_lr
101
+
102
+ def get_lr(self) -> float:
103
+ """Get current learning rate"""
104
+ return self.current_lr
105
+
106
+
107
+ class CosineAnnealingLR:
108
+ """
109
+ Cosine Annealing Learning Rate Scheduler
110
+
111
+ Sets the learning rate using a cosine annealing schedule:
112
+ lr = lr_min + 0.5 * (lr_max - lr_min) * (1 + cos(pi * epoch / T_max))
113
+
114
+ Used in modern transformers and state-of-the-art models.
115
+
116
+ Args:
117
+ initial_lr: Maximum learning rate
118
+ T_max: Maximum number of iterations
119
+ eta_min: Minimum learning rate (default: 0)
120
+ """
121
+
122
+ def __init__(self, initial_lr: float, T_max: int, eta_min: float = 0):
123
+ self.initial_lr = initial_lr
124
+ self.T_max = T_max
125
+ self.eta_min = eta_min
126
+ self.current_epoch = 0
127
+ self.current_lr = initial_lr
128
+
129
+ def step(self, epoch: Optional[int] = None) -> float:
130
+ """Update learning rate"""
131
+ if epoch is not None:
132
+ self.current_epoch = epoch
133
+ else:
134
+ self.current_epoch += 1
135
+
136
+ self.current_lr = self.eta_min + (self.initial_lr - self.eta_min) * \
137
+ (1 + np.cos(np.pi * self.current_epoch / self.T_max)) / 2
138
+ return self.current_lr
139
+
140
+ def get_lr(self) -> float:
141
+ """Get current learning rate"""
142
+ return self.current_lr
143
+
144
+
145
+ class CosineAnnealingWarmRestarts:
146
+ """
147
+ Cosine Annealing with Warm Restarts (SGDR)
148
+
149
+ Implements SGDR: Stochastic Gradient Descent with Warm Restarts.
150
+ Periodically resets the learning rate to help escape local minima.
151
+
152
+ Args:
153
+ initial_lr: Maximum learning rate
154
+ T_0: Number of iterations for the first restart
155
+ T_mult: Factor to increase T_i after each restart (default: 1)
156
+ eta_min: Minimum learning rate (default: 0)
157
+ """
158
+
159
+ def __init__(self, initial_lr: float, T_0: int, T_mult: int = 1, eta_min: float = 0):
160
+ self.initial_lr = initial_lr
161
+ self.T_0 = T_0
162
+ self.T_mult = T_mult
163
+ self.eta_min = eta_min
164
+ self.current_epoch = 0
165
+ self.T_cur = 0
166
+ self.T_i = T_0
167
+ self.current_lr = initial_lr
168
+
169
+ def step(self, epoch: Optional[int] = None) -> float:
170
+ """Update learning rate"""
171
+ if epoch is None:
172
+ epoch = self.current_epoch + 1
173
+
174
+ self.current_epoch = epoch
175
+ self.T_cur = epoch % self.T_i
176
+
177
+ # Check if we need to restart
178
+ if self.T_cur == 0 and epoch > 0:
179
+ self.T_i = self.T_i * self.T_mult
180
+
181
+ self.current_lr = self.eta_min + (self.initial_lr - self.eta_min) * \
182
+ (1 + np.cos(np.pi * self.T_cur / self.T_i)) / 2
183
+ return self.current_lr
184
+
185
+ def get_lr(self) -> float:
186
+ """Get current learning rate"""
187
+ return self.current_lr
188
+
189
+
190
+ class OneCycleLR:
191
+ """
192
+ One Cycle Learning Rate Policy
193
+
194
+ Implements the One Cycle Policy for super-convergence.
195
+ Single cycle: warmup -> peak -> decay
196
+
197
+ Can significantly reduce training time while improving performance.
198
+
199
+ Args:
200
+ max_lr: Maximum learning rate
201
+ total_steps: Total number of training steps
202
+ pct_start: Percentage of cycle spent increasing LR (default: 0.3)
203
+ anneal_strategy: 'cos' or 'linear' (default: 'cos')
204
+ div_factor: Initial LR = max_lr / div_factor (default: 25)
205
+ final_div_factor: Final LR = max_lr / final_div_factor (default: 10000)
206
+ """
207
+
208
+ def __init__(
209
+ self,
210
+ max_lr: float,
211
+ total_steps: int,
212
+ pct_start: float = 0.3,
213
+ anneal_strategy: str = 'cos',
214
+ div_factor: float = 25.0,
215
+ final_div_factor: float = 10000.0
216
+ ):
217
+ self.max_lr = max_lr
218
+ self.total_steps = total_steps
219
+ self.pct_start = pct_start
220
+ self.anneal_strategy = anneal_strategy
221
+ self.initial_lr = max_lr / div_factor
222
+ self.final_lr = max_lr / final_div_factor
223
+
224
+ self.step_size_up = int(total_steps * pct_start)
225
+ self.step_size_down = total_steps - self.step_size_up
226
+
227
+ self.current_step = 0
228
+ self.current_lr = self.initial_lr
229
+
230
+ def step(self) -> float:
231
+ """Update learning rate for next step"""
232
+ self.current_step += 1
233
+
234
+ if self.current_step <= self.step_size_up:
235
+ # Warmup phase
236
+ pct = self.current_step / self.step_size_up
237
+ self.current_lr = self.initial_lr + (self.max_lr - self.initial_lr) * pct
238
+ else:
239
+ # Annealing phase
240
+ pct = (self.current_step - self.step_size_up) / self.step_size_down
241
+
242
+ if self.anneal_strategy == 'cos':
243
+ self.current_lr = self.final_lr + (self.max_lr - self.final_lr) * \
244
+ (1 + np.cos(np.pi * pct)) / 2
245
+ else: # linear
246
+ self.current_lr = self.max_lr - (self.max_lr - self.final_lr) * pct
247
+
248
+ return self.current_lr
249
+
250
+ def get_lr(self) -> float:
251
+ """Get current learning rate"""
252
+ return self.current_lr
253
+
254
+
255
+ class ReduceLROnPlateau:
256
+ """
257
+ Reduce Learning Rate on Plateau
258
+
259
+ Reduces learning rate when a metric has stopped improving.
260
+ Adaptive scheduler based on validation performance.
261
+
262
+ Args:
263
+ initial_lr: Initial learning rate
264
+ mode: 'min' or 'max' (default: 'min')
265
+ factor: Factor by which LR will be reduced (default: 0.1)
266
+ patience: Number of epochs with no improvement (default: 10)
267
+ threshold: Threshold for measuring improvement (default: 1e-4)
268
+ min_lr: Minimum learning rate (default: 0)
269
+ """
270
+
271
+ def __init__(
272
+ self,
273
+ initial_lr: float,
274
+ mode: str = 'min',
275
+ factor: float = 0.1,
276
+ patience: int = 10,
277
+ threshold: float = 1e-4,
278
+ min_lr: float = 0
279
+ ):
280
+ self.initial_lr = initial_lr
281
+ self.mode = mode
282
+ self.factor = factor
283
+ self.patience = patience
284
+ self.threshold = threshold
285
+ self.min_lr = min_lr
286
+
287
+ self.current_lr = initial_lr
288
+ self.best_value = np.inf if mode == 'min' else -np.inf
289
+ self.num_bad_epochs = 0
290
+ self.cooldown_counter = 0
291
+
292
+ def step(self, metric: float) -> float:
293
+ """
294
+ Update learning rate based on metric
295
+
296
+ Args:
297
+ metric: Current metric value (e.g., validation loss)
298
+
299
+ Returns:
300
+ Updated learning rate
301
+ """
302
+ if self.cooldown_counter > 0:
303
+ self.cooldown_counter -= 1
304
+ return self.current_lr
305
+
306
+ # Check if metric improved
307
+ if self.mode == 'min':
308
+ improved = metric < self.best_value - self.threshold
309
+ else:
310
+ improved = metric > self.best_value + self.threshold
311
+
312
+ if improved:
313
+ self.best_value = metric
314
+ self.num_bad_epochs = 0
315
+ else:
316
+ self.num_bad_epochs += 1
317
+
318
+ # Reduce LR if no improvement for patience epochs
319
+ if self.num_bad_epochs >= self.patience:
320
+ self.current_lr = max(self.current_lr * self.factor, self.min_lr)
321
+ self.num_bad_epochs = 0
322
+ self.cooldown_counter = self.patience
323
+
324
+ return self.current_lr
325
+
326
+ def get_lr(self) -> float:
327
+ """Get current learning rate"""
328
+ return self.current_lr
329
+
330
+
331
+ class PolynomialLRScheduler:
332
+ """
333
+ Polynomial Learning Rate Decay
334
+
335
+ Decays learning rate using polynomial function.
336
+ Used in BERT and other transformer models.
337
+
338
+ Args:
339
+ initial_lr: Initial learning rate
340
+ total_steps: Total number of training steps
341
+ power: Polynomial power (default: 1.0 for linear)
342
+ end_lr: Minimum learning rate (default: 0)
343
+ """
344
+
345
+ def __init__(
346
+ self,
347
+ initial_lr: float,
348
+ total_steps: int,
349
+ power: float = 1.0,
350
+ end_lr: float = 0
351
+ ):
352
+ self.initial_lr = initial_lr
353
+ self.total_steps = total_steps
354
+ self.power = power
355
+ self.end_lr = end_lr
356
+ self.current_step = 0
357
+ self.current_lr = initial_lr
358
+
359
+ def step(self) -> float:
360
+ """Update learning rate"""
361
+ self.current_step += 1
362
+
363
+ if self.current_step >= self.total_steps:
364
+ self.current_lr = self.end_lr
365
+ else:
366
+ decay_factor = (1 - self.current_step / self.total_steps) ** self.power
367
+ self.current_lr = (self.initial_lr - self.end_lr) * decay_factor + self.end_lr
368
+
369
+ return self.current_lr
370
+
371
+ def get_lr(self) -> float:
372
+ """Get current learning rate"""
373
+ return self.current_lr
374
+
375
+
376
+ class LinearWarmupScheduler:
377
+ """
378
+ Linear Warmup Scheduler
379
+
380
+ Linearly increases learning rate from 0 to target over warmup steps.
381
+ Often combined with other schedulers for stable training start.
382
+
383
+ Args:
384
+ target_lr: Target learning rate after warmup
385
+ warmup_steps: Number of warmup steps
386
+ """
387
+
388
+ def __init__(self, target_lr: float, warmup_steps: int):
389
+ self.target_lr = target_lr
390
+ self.warmup_steps = warmup_steps
391
+ self.current_step = 0
392
+ self.current_lr = 0
393
+
394
+ def step(self) -> float:
395
+ """Update learning rate"""
396
+ self.current_step += 1
397
+
398
+ if self.current_step >= self.warmup_steps:
399
+ self.current_lr = self.target_lr
400
+ else:
401
+ self.current_lr = self.target_lr * (self.current_step / self.warmup_steps)
402
+
403
+ return self.current_lr
404
+
405
+ def get_lr(self) -> float:
406
+ """Get current learning rate"""
407
+ return self.current_lr
408
+
409
+
410
+ class CyclicalLR:
411
+ """
412
+ Cyclical Learning Rate
413
+
414
+ Cycles learning rate between base_lr and max_lr.
415
+ Helps explore loss landscape and escape local minima.
416
+
417
+ Args:
418
+ base_lr: Minimum learning rate
419
+ max_lr: Maximum learning rate
420
+ step_size: Half cycle length (in steps)
421
+ mode: 'triangular', 'triangular2', or 'exp_range'
422
+ gamma: Decay constant for exp_range mode (default: 1.0)
423
+ """
424
+
425
+ def __init__(
426
+ self,
427
+ base_lr: float,
428
+ max_lr: float,
429
+ step_size: int,
430
+ mode: str = 'triangular',
431
+ gamma: float = 1.0
432
+ ):
433
+ self.base_lr = base_lr
434
+ self.max_lr = max_lr
435
+ self.step_size = step_size
436
+ self.mode = mode
437
+ self.gamma = gamma
438
+
439
+ self.current_step = 0
440
+ self.cycle = 0
441
+ self.current_lr = base_lr
442
+
443
+ def step(self) -> float:
444
+ """Update learning rate"""
445
+ self.current_step += 1
446
+ self.cycle = np.floor(1 + self.current_step / (2 * self.step_size))
447
+ x = np.abs(self.current_step / self.step_size - 2 * self.cycle + 1)
448
+
449
+ if self.mode == 'triangular':
450
+ scale_factor = 1.0
451
+ elif self.mode == 'triangular2':
452
+ scale_factor = 1 / (2 ** (self.cycle - 1))
453
+ else: # exp_range
454
+ scale_factor = self.gamma ** self.current_step
455
+
456
+ self.current_lr = self.base_lr + (self.max_lr - self.base_lr) * \
457
+ max(0, (1 - x)) * scale_factor
458
+
459
+ return self.current_lr
460
+
461
+ def get_lr(self) -> float:
462
+ """Get current learning rate"""
463
+ return self.current_lr
464
+
465
+
466
+ class LRFinder:
467
+ """
468
+ Learning Rate Finder
469
+
470
+ Finds optimal learning rate by gradually increasing LR and
471
+ monitoring loss. Helps determine good initial learning rate.
472
+
473
+ Based on Leslie Smith's LR range test.
474
+
475
+ Args:
476
+ start_lr: Starting learning rate (default: 1e-7)
477
+ end_lr: Ending learning rate (default: 10)
478
+ num_steps: Number of steps for the test (default: 100)
479
+ """
480
+
481
+ def __init__(
482
+ self,
483
+ start_lr: float = 1e-7,
484
+ end_lr: float = 10,
485
+ num_steps: int = 100
486
+ ):
487
+ self.start_lr = start_lr
488
+ self.end_lr = end_lr
489
+ self.num_steps = num_steps
490
+
491
+ self.current_step = 0
492
+ self.current_lr = start_lr
493
+ self.lr_history = []
494
+ self.loss_history = []
495
+
496
+ # Calculate multiplicative factor
497
+ self.mult_factor = (end_lr / start_lr) ** (1 / num_steps)
498
+
499
+ def step(self, loss: float) -> float:
500
+ """
501
+ Update learning rate and record loss
502
+
503
+ Args:
504
+ loss: Current training loss
505
+
506
+ Returns:
507
+ Updated learning rate
508
+ """
509
+ self.lr_history.append(self.current_lr)
510
+ self.loss_history.append(loss)
511
+
512
+ self.current_step += 1
513
+ self.current_lr = self.start_lr * (self.mult_factor ** self.current_step)
514
+
515
+ return self.current_lr
516
+
517
+ def get_lr(self) -> float:
518
+ """Get current learning rate"""
519
+ return self.current_lr
520
+
521
+ def plot_results(self) -> Tuple[List[float], List[float]]:
522
+ """
523
+ Get LR and loss history for plotting
524
+
525
+ Returns:
526
+ Tuple of (lr_history, loss_history)
527
+ """
528
+ return self.lr_history, self.loss_history
529
+
530
+ def suggest_lr(self) -> float:
531
+ """
532
+ Suggest optimal learning rate based on loss curve
533
+
534
+ Returns:
535
+ Suggested learning rate
536
+ """
537
+ if len(self.loss_history) < 2:
538
+ return self.start_lr
539
+
540
+ # Find LR with steepest negative gradient
541
+ losses = np.array(self.loss_history)
542
+ lrs = np.array(self.lr_history)
543
+
544
+ # Smooth losses
545
+ window = min(5, len(losses) // 10)
546
+ if window > 1:
547
+ losses = np.convolve(losses, np.ones(window)/window, mode='valid')
548
+ lrs = lrs[:len(losses)]
549
+
550
+ # Find steepest descent
551
+ gradients = np.gradient(losses)
552
+ min_gradient_idx = np.argmin(gradients)
553
+
554
+ # Suggest LR at steepest descent
555
+ suggested_lr = lrs[min_gradient_idx]
556
+
557
+ return suggested_lr
558
+
559
+
560
+ # ============================================================================
561
+ # COMBINED SCHEDULERS
562
+ # ============================================================================
563
+
564
+ class WarmupCosineScheduler:
565
+ """
566
+ Warmup + Cosine Annealing Scheduler
567
+
568
+ Combines linear warmup with cosine annealing.
569
+ Common in transformer training (BERT, GPT, etc.)
570
+
571
+ Args:
572
+ max_lr: Maximum learning rate
573
+ warmup_steps: Number of warmup steps
574
+ total_steps: Total number of training steps
575
+ min_lr: Minimum learning rate (default: 0)
576
+ """
577
+
578
+ def __init__(
579
+ self,
580
+ max_lr: float,
581
+ warmup_steps: int,
582
+ total_steps: int,
583
+ min_lr: float = 0
584
+ ):
585
+ self.max_lr = max_lr
586
+ self.warmup_steps = warmup_steps
587
+ self.total_steps = total_steps
588
+ self.min_lr = min_lr
589
+
590
+ self.current_step = 0
591
+ self.current_lr = 0
592
+
593
+ def step(self) -> float:
594
+ """Update learning rate"""
595
+ self.current_step += 1
596
+
597
+ if self.current_step <= self.warmup_steps:
598
+ # Warmup phase
599
+ self.current_lr = self.max_lr * (self.current_step / self.warmup_steps)
600
+ else:
601
+ # Cosine annealing phase
602
+ progress = (self.current_step - self.warmup_steps) / \
603
+ (self.total_steps - self.warmup_steps)
604
+ self.current_lr = self.min_lr + (self.max_lr - self.min_lr) * \
605
+ (1 + np.cos(np.pi * progress)) / 2
606
+
607
+ return self.current_lr
608
+
609
+ def get_lr(self) -> float:
610
+ """Get current learning rate"""
611
+ return self.current_lr
612
+
613
+
614
+ # ============================================================================
615
+ # UTILITY FUNCTIONS
616
+ # ============================================================================
617
+
618
+ def get_scheduler(
619
+ scheduler_name: str,
620
+ initial_lr: float,
621
+ **kwargs
622
+ ) -> object:
623
+ """
624
+ Factory function to create scheduler by name
625
+
626
+ Args:
627
+ scheduler_name: Name of the scheduler
628
+ initial_lr: Initial learning rate
629
+ **kwargs: Additional scheduler-specific arguments
630
+
631
+ Returns:
632
+ Scheduler instance
633
+ """
634
+ schedulers = {
635
+ 'step': StepLRScheduler,
636
+ 'exponential': ExponentialLRScheduler,
637
+ 'cosine': CosineAnnealingLR,
638
+ 'cosine_restarts': CosineAnnealingWarmRestarts,
639
+ 'onecycle': OneCycleLR,
640
+ 'plateau': ReduceLROnPlateau,
641
+ 'polynomial': PolynomialLRScheduler,
642
+ 'warmup': LinearWarmupScheduler,
643
+ 'cyclical': CyclicalLR,
644
+ 'warmup_cosine': WarmupCosineScheduler,
645
+ }
646
+
647
+ if scheduler_name not in schedulers:
648
+ raise ValueError(f"Unknown scheduler: {scheduler_name}")
649
+
650
+ return schedulers[scheduler_name](initial_lr, **kwargs)
651
+
652
+
653
+ # ============================================================================
654
+ # ALIASES FOR CONVENIENCE
655
+ # ============================================================================
656
+
657
+ step_lr = StepLRScheduler
658
+ exp_lr = ExponentialLRScheduler
659
+ cosine_lr = CosineAnnealingLR
660
+ sgdr = CosineAnnealingWarmRestarts
661
+ onecycle = OneCycleLR
662
+ plateau_lr = ReduceLROnPlateau
663
+ poly_lr = PolynomialLRScheduler
664
+ warmup_lr = LinearWarmupScheduler
665
+ cyclical_lr = CyclicalLR
666
+ lr_finder = LRFinder
667
+ warmup_cosine = WarmupCosineScheduler
668
+
669
+
670
+ __all__ = [
671
+ # Scheduler Classes
672
+ 'StepLRScheduler',
673
+ 'ExponentialLRScheduler',
674
+ 'CosineAnnealingLR',
675
+ 'CosineAnnealingWarmRestarts',
676
+ 'OneCycleLR',
677
+ 'ReduceLROnPlateau',
678
+ 'PolynomialLRScheduler',
679
+ 'LinearWarmupScheduler',
680
+ 'CyclicalLR',
681
+ 'LRFinder',
682
+ 'WarmupCosineScheduler',
683
+ # Utility Functions
684
+ 'get_scheduler',
685
+ # Aliases
686
+ 'step_lr',
687
+ 'exp_lr',
688
+ 'cosine_lr',
689
+ 'sgdr',
690
+ 'onecycle',
691
+ 'plateau_lr',
692
+ 'poly_lr',
693
+ 'warmup_lr',
694
+ 'cyclical_lr',
695
+ 'lr_finder',
696
+ 'warmup_cosine',
697
+ ]