adv-optm 0.1.9__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of adv-optm might be problematic. Click here for more details.
- {adv_optm-0.1.9 → adv_optm-1.0.0}/PKG-INFO +1 -1
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/__init__.py +1 -1
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/optim/Prodigy_adv.py +3 -6
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm.egg-info/PKG-INFO +1 -1
- {adv_optm-0.1.9 → adv_optm-1.0.0}/setup.py +1 -1
- {adv_optm-0.1.9 → adv_optm-1.0.0}/LICENSE +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/README.md +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/optim/AdamW_adv.py +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/optim/Adopt_adv.py +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/optim/Lion_Prodigy_adv.py +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/optim/Lion_adv.py +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/optim/Simplified_AdEMAMix.py +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/optim/__init__.py +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/util/BF16_Stochastic_Rounding.py +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/util/Effective_Shape.py +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/util/NNMF.py +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/util/One_Bit_Boolean.py +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/util/OrthoGrad.py +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/util/__init__.py +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm.egg-info/SOURCES.txt +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm.egg-info/dependency_links.txt +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm.egg-info/requires.txt +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm.egg-info/top_level.txt +0 -0
- {adv_optm-0.1.9 → adv_optm-1.0.0}/setup.cfg +0 -0
|
@@ -141,6 +141,9 @@ class Prodigy_adv(torch.optim.Optimizer):
|
|
|
141
141
|
if use_atan2 and Simplified_AdEMAMix:
|
|
142
142
|
print("Warning: use_atan2 is incompatible with Simplified_AdEMAMix. Disabling use_atan2.")
|
|
143
143
|
use_atan2 = False
|
|
144
|
+
if Simplified_AdEMAMix and alpha_grad > 0:
|
|
145
|
+
# scales d_coef by alpha_grad, this force prodigy to behave well with Simplified_AdEMAMix
|
|
146
|
+
d_coef = d_coef/alpha_grad
|
|
144
147
|
|
|
145
148
|
defaults = {
|
|
146
149
|
"lr": lr, "betas": betas, "eps": eps, "weight_decay": weight_decay,
|
|
@@ -456,12 +459,6 @@ class Prodigy_adv(torch.optim.Optimizer):
|
|
|
456
459
|
|
|
457
460
|
d_hat = self.d
|
|
458
461
|
if global_d_denom > 0:
|
|
459
|
-
if self.Simplified_AdEMAMix and g_group['alpha_grad'] > 0:
|
|
460
|
-
# A simple and effective hack to make prodigy compatible with Simplified_AdEMAMix large step sizes
|
|
461
|
-
# by diving by alpha_grad we make sure that d_numerator that was influenced by (alpha_grad * grad)
|
|
462
|
-
# are now normalized by /alpha_grad. this is a heuristic way since the update is also influenced by
|
|
463
|
-
# the increasing and decaying accumulator but it's effective and it worked for me (for Lora/Finetune).
|
|
464
|
-
global_d_numerator /= g_group['alpha_grad']
|
|
465
462
|
d_hat = d_coef * global_d_numerator / global_d_denom
|
|
466
463
|
if self.d == g_group['d0']:
|
|
467
464
|
self.d = max(self.d, d_hat)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|