adv-optm 1.1.2__tar.gz → 1.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of adv-optm might be problematic. Click here for more details.
- {adv_optm-1.1.2 → adv_optm-1.1.3}/PKG-INFO +1 -1
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/__init__.py +1 -1
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/optim/AdamW_adv.py +2 -2
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/optim/Adopt_adv.py +1 -1
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/optim/Lion_Prodigy_adv.py +1 -1
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/optim/Prodigy_adv.py +2 -2
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/optim/Simplified_AdEMAMix.py +2 -2
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm.egg-info/PKG-INFO +1 -1
- {adv_optm-1.1.2 → adv_optm-1.1.3}/setup.py +1 -1
- {adv_optm-1.1.2 → adv_optm-1.1.3}/LICENSE +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/README.md +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/optim/Lion_adv.py +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/optim/__init__.py +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/util/BF16_Stochastic_Rounding.py +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/util/Effective_Shape.py +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/util/Kourkoutas.py +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/util/NNMF.py +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/util/One_Bit_Boolean.py +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/util/OrthoGrad.py +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm/util/__init__.py +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm.egg-info/SOURCES.txt +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm.egg-info/dependency_links.txt +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm.egg-info/requires.txt +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/adv_optm.egg-info/top_level.txt +0 -0
- {adv_optm-1.1.2 → adv_optm-1.1.3}/setup.cfg +0 -0
|
@@ -209,7 +209,7 @@ class AdamW_adv(torch.optim.Optimizer):
|
|
|
209
209
|
beta1, beta2 = group['betas']
|
|
210
210
|
|
|
211
211
|
current_step = state['step']
|
|
212
|
-
if group
|
|
212
|
+
if group.get('kourkoutas_beta', False):
|
|
213
213
|
# Call prepare_step() once at the beginning of the step for all params
|
|
214
214
|
self.kourkoutas_helper.maybe_prepare_step(current_step)
|
|
215
215
|
# Accumulate current grad's norm for the *next* step
|
|
@@ -220,7 +220,7 @@ class AdamW_adv(torch.optim.Optimizer):
|
|
|
220
220
|
step = state['step'] + 1
|
|
221
221
|
if group['use_bias_correction']:
|
|
222
222
|
bias_correction1 = 1.0 - beta1 ** step
|
|
223
|
-
if group
|
|
223
|
+
if group.get('kourkoutas_beta', False):
|
|
224
224
|
bias_correction2 = 1.0 - group['betas'][1] ** step
|
|
225
225
|
# Use beta2_max for bias correction
|
|
226
226
|
else:
|
|
@@ -240,7 +240,7 @@ class Adopt_adv(torch.optim.Optimizer):
|
|
|
240
240
|
beta1, beta2 = group['betas']
|
|
241
241
|
|
|
242
242
|
current_step = state['step']
|
|
243
|
-
if group
|
|
243
|
+
if group.get('kourkoutas_beta', False):
|
|
244
244
|
# Call prepare_step() once at the beginning of the step for all params
|
|
245
245
|
self.kourkoutas_helper.maybe_prepare_step(current_step)
|
|
246
246
|
# Accumulate current grad's norm for the *next* step
|
|
@@ -325,7 +325,7 @@ class Lion_Prodigy_adv(torch.optim.Optimizer):
|
|
|
325
325
|
d_hat = self.d
|
|
326
326
|
if global_d_denom > 0:
|
|
327
327
|
d_hat = d_coef * global_d_numerator / global_d_denom
|
|
328
|
-
if g_group
|
|
328
|
+
if g_group.get('d_limiter', False):
|
|
329
329
|
d_hat = min(self.d * (2 ** 0.25), d_hat)
|
|
330
330
|
if self.d == g_group['d0']:
|
|
331
331
|
self.d = max(self.d, d_hat)
|
|
@@ -304,7 +304,7 @@ class Prodigy_adv(torch.optim.Optimizer):
|
|
|
304
304
|
state['p0'] = torch.tensor(0, device=device, dtype=p.dtype)
|
|
305
305
|
|
|
306
306
|
current_step = state['step']
|
|
307
|
-
if group
|
|
307
|
+
if group.get('kourkoutas_beta', False):
|
|
308
308
|
# Call prepare_step() once at the beginning of the step for all params
|
|
309
309
|
self.kourkoutas_helper.maybe_prepare_step(current_step)
|
|
310
310
|
# Accumulate current grad's norm for the *next* step
|
|
@@ -515,7 +515,7 @@ class Prodigy_adv(torch.optim.Optimizer):
|
|
|
515
515
|
d_hat = self.d
|
|
516
516
|
if global_d_denom > 0:
|
|
517
517
|
d_hat = d_coef * global_d_numerator / global_d_denom
|
|
518
|
-
if g_group
|
|
518
|
+
if g_group.get('d_limiter', False):
|
|
519
519
|
d_hat = min(self.d * (2 ** 0.25), d_hat)
|
|
520
520
|
if self.d == g_group['d0']:
|
|
521
521
|
self.d = max(self.d, d_hat)
|
|
@@ -191,7 +191,7 @@ class Simplified_AdEMAMix(torch.optim.Optimizer):
|
|
|
191
191
|
beta1_final, beta2 = group["betas"]
|
|
192
192
|
|
|
193
193
|
current_step = state['step']
|
|
194
|
-
if group
|
|
194
|
+
if group.get('kourkoutas_beta', False):
|
|
195
195
|
# Call prepare_step() once at the beginning of the step for all params
|
|
196
196
|
self.kourkoutas_helper.maybe_prepare_step(current_step)
|
|
197
197
|
# Accumulate current grad's norm for the *next* step
|
|
@@ -210,7 +210,7 @@ class Simplified_AdEMAMix(torch.optim.Optimizer):
|
|
|
210
210
|
|
|
211
211
|
if group['use_bias_correction']:
|
|
212
212
|
state['num_sum'] = beta1 * state['num_sum'] + 1.0
|
|
213
|
-
if group
|
|
213
|
+
if group.get('kourkoutas_beta', False):
|
|
214
214
|
state['den_sum'] = group['betas'][1] * state['den_sum'] + (1.0 - group['betas'][1])
|
|
215
215
|
else:
|
|
216
216
|
state['den_sum'] = beta2 * state['den_sum'] + (1.0 - beta2)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|