PyPI - adv-optm - Versions diffs - 0.1.9__tar.gz → 1.0.0__tar.gz - Mend

adv-optm 0.1.9tar.gz → 1.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of adv-optm might be problematic. Click here for more details.

Files changed (24) hide show

{adv_optm-0.1.9 → adv_optm-1.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: adv_optm
-Version: 0.1.9
+Version: 1.0.0
 Summary: A family of highly efficient, lightweight yet powerful optimizers.
 Home-page: https://github.com/Koratahiu/Advanced_Optimizers
 Author: Koratahiu

{adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/__init__.py RENAMED Viewed

@@ -16,4 +16,4 @@ __all__ = [
     "Lion_Prodigy_adv",
 ]
-__version__ = "0.1.9"
+__version__ = "1.0.0"

{adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm/optim/Prodigy_adv.py RENAMED Viewed

@@ -141,6 +141,9 @@ class Prodigy_adv(torch.optim.Optimizer):
         if use_atan2 and Simplified_AdEMAMix:
             print("Warning: use_atan2 is incompatible with Simplified_AdEMAMix. Disabling use_atan2.")
             use_atan2 = False
+        if Simplified_AdEMAMix and alpha_grad > 0:
+            # scales d_coef by alpha_grad, this force prodigy to behave well with Simplified_AdEMAMix
+            d_coef = d_coef/alpha_grad
         defaults = {
             "lr": lr, "betas": betas, "eps": eps, "weight_decay": weight_decay,
@@ -456,12 +459,6 @@ class Prodigy_adv(torch.optim.Optimizer):
             d_hat = self.d
             if global_d_denom > 0:
-                if self.Simplified_AdEMAMix and g_group['alpha_grad'] > 0:
-                    # A simple and effective hack to make prodigy compatible with Simplified_AdEMAMix large step sizes
-                    # by diving by alpha_grad we make sure that d_numerator that was influenced by (alpha_grad * grad)
-                    # are now normalized by /alpha_grad. this is a heuristic way since the update is also influenced by
-                    # the increasing and decaying accumulator but it's effective and it worked for me (for Lora/Finetune).
-                    global_d_numerator /= g_group['alpha_grad']
                 d_hat = d_coef * global_d_numerator / global_d_denom
                 if self.d == g_group['d0']:
                     self.d = max(self.d, d_hat)

{adv_optm-0.1.9 → adv_optm-1.0.0}/adv_optm.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: adv_optm
-Version: 0.1.9
+Version: 1.0.0
 Summary: A family of highly efficient, lightweight yet powerful optimizers.
 Home-page: https://github.com/Koratahiu/Advanced_Optimizers
 Author: Koratahiu

{adv_optm-0.1.9 → adv_optm-1.0.0}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
 setup(
     name="adv_optm",
-    version="0.1.9",
+    version="1.0.0",
     author="Koratahiu",
     author_email="hiuhonor@gmail.com",
     license='Apache 2.0',