PyPI - heavyball - Versions diffs - 0.24.3__py3-none-any.whl → 0.25.0__py3-none-any.whl - Mend

heavyball 0.24.3py3-none-any.whl → 0.25.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

heavyball/foreach_soap.py CHANGED Viewed

@@ -71,12 +71,11 @@ class ForeachSOAP(StatefulOptimizer):
         # Decay the first and second moment running average coefficient
         # In-place operations to update the averages at the same time
         step_tensor = torch.empty((), dtype=torch.int32, device=p_list[0].device).fill_(step)
-        denom = exp_avg_(exp_avg, exp_avg_sq, grad, grad_projected, beta1, beta2, step_tensor)
         step_size = -group["lr"] * min(step / group['warmup_steps'], 1)
-        for p, g, ea, d in zip(p_list, grad, exp_avg, denom):
-            state = self.state_(p)
+        for p, g, gp, ea, eas in zip(p_list, grad, grad_projected, exp_avg, exp_avg_sq):
+            d = exp_avg_(ea, eas, g, gp, beta1, beta2, step_tensor)[0]
             # Projecting the exponential moving average of gradients to the eigenbases of Shampoo's preconditioner
             # i.e. projecting to the eigenbases of matrices in state['GG']
             exp_avg_projected = project(ea, state['Q'], False)

heavyball/palm_foreach_soap.py CHANGED Viewed

@@ -81,11 +81,10 @@ class PaLMForeachSOAP(StatefulOptimizer):
         # In-place operations to update the averages at the same time
         beta2 = torch.empty((), dtype=torch.float32, device=p_list[0].device).fill_(beta2)
         step_tensor = torch.empty((), dtype=torch.int32, device=p_list[0].device).fill_(step)
-        denom = exp_avg_(exp_avg, exp_avg_sq, grad, grad_projected, beta1, beta2, step_tensor)
         step_size = -group["lr"] * min(step / group['warmup_steps'], 1)
-        for p, g, ea, d in zip(p_list, grad, exp_avg, denom):
-            state = self.state_(p)
+        for p, g, gp, ea, eas in zip(p_list, grad, grad_projected, exp_avg, exp_avg_sq):
+            d = exp_avg_(ea, eas, g, gp, beta1, beta2, step_tensor)[0]
             # Projecting the exponential moving average of gradients to the eigenbases of Shampoo's preconditioner
             # i.e. projecting to the eigenbases of matrices in state['GG']
             exp_avg_projected = project(ea, state['Q'], False)

heavyball/precond_schedule_foreach_soap.py CHANGED Viewed

@@ -73,12 +73,12 @@ class PrecondScheduleForeachSOAP(StatefulOptimizer):
         # Decay the first and second moment running average coefficient
         # In-place operations to update the averages at the same time
         step_tensor = torch.empty((), dtype=torch.int32, device=p_list[0].device).fill_(step)
-        denom = exp_avg_(exp_avg, exp_avg_sq, grad, grad_projected, beta1, beta2, step_tensor)
         update_precond = precond_schedule(step, group['precond_scheduler'], self.rng)
         step_size = -group["lr"] * min(step / group['warmup_steps'], 1)
-        for p, g, ea, d in zip(p_list, grad, exp_avg, denom):
+        for p, g, gp, ea, eas in zip(p_list, grad, grad_projected, exp_avg, exp_avg_sq):
+            d = exp_avg_(ea, eas, g, gp, beta1, beta2, step_tensor)[0]
             state = self.state_(p)
             # Projecting the exponential moving average of gradients to the eigenbases of Shampoo's preconditioner
             # i.e. projecting to the eigenbases of matrices in state['GG']

heavyball/precond_schedule_palm_foreach_soap.py CHANGED Viewed

@@ -84,12 +84,12 @@ class PrecondSchedulePaLMForeachSOAP(StatefulOptimizer):
         # In-place operations to update the averages at the same time
         beta2 = torch.empty((), dtype=torch.float32, device=p_list[0].device).fill_(beta2)
         step_tensor = torch.empty((), dtype=torch.int32, device=p_list[0].device).fill_(step)
-        denom = exp_avg_(exp_avg, exp_avg_sq, grad, grad_projected, beta1, beta2, step_tensor)
         update_precond = precond_schedule(step, group['precond_scheduler'], self.rng)
         step_size = -group["lr"] * min(step / group['warmup_steps'], 1)
-        for p, g, ea, d in zip(p_list, grad, exp_avg, denom):
+        for p, g, gp, ea, eas in zip(p_list, grad, grad_projected, exp_avg, exp_avg_sq):
+            d = exp_avg_(ea, eas, g, gp, beta1, beta2, step_tensor)[0]
             state = self.state_(p)
             # Projecting the exponential moving average of gradients to the eigenbases of Shampoo's preconditioner
             # i.e. projecting to the eigenbases of matrices in state['GG']

heavyball/utils.py CHANGED Viewed

@@ -492,22 +492,20 @@ class StatefulOptimizer(torch.optim.Optimizer):
         super().__init__(params, {**defaults, 'foreach': foreach})
         self.fake_groups = {}
         self.use_ema = use_ema
-    def key(self, param: Tensor):
-        return (param.data_ptr(), tuple(param.shape))
+        self.mapping = {}
     def get_groups(self, group):
         if group['foreach']:
             return [group]
         for p in group['params']:
-            if self.key(p) not in self.fake_groups:
-                self.fake_groups[self.key(p)] = {**group, 'params': [p]}
+            if p not in self.fake_groups:
+                self.fake_groups[p] = {**group, 'params': [p]}
-        return [self.fake_groups[self.key(p)] for p in group['params']]
+        return [self.fake_groups[p] for p in group['params']]
     def state_(self, arg: Tensor):
-        return self.state[self.key(arg)]
+        return self.state[self.mapping.get(arg, arg)]
     def mars_correct_list(self, group, p_list, g_list, mars_gamma, beta):
         for p, g in zip(p_list, g_list):
@@ -538,6 +536,8 @@ class StatefulOptimizer(torch.optim.Optimizer):
             p_views = merge_group(group, p)
             if grad is not None:
                 grad = merge_group(group, grad)
+            for i, pv in enumerate(p_views):
+                self.mapping[pv] = (p, i)
             if isinstance(p_views, Tensor):
                 yield p_views, grad
                 continue
@@ -622,11 +622,14 @@ class StatefulOptimizer(torch.optim.Optimizer):
             for top_group in self.param_groups:
                 for group in self.get_groups(top_group):
                     self._step(group)
+                    self.mapping.clear()
                     if self.use_ema:
                         self.ema_update(group)
         return loss
 class ScheduleFree(StatefulOptimizer):
     def eval(self):
         for group in self.param_groups:

{heavyball-0.24.3.dist-info → heavyball-0.25.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: heavyball
-Version: 0.24.3
+Version: 0.25.0
 Summary: Efficient optimizers
 Home-page: https://github.com/clashluke/heavyball
 Author: Lucas Nestler

{heavyball-0.24.3.dist-info → heavyball-0.25.0.dist-info}/RECORD RENAMED Viewed

@@ -6,19 +6,19 @@ heavyball/foreach_adamw.py,sha256=K4xTes4drylAqaqWky8O_Bg_mmbAmcHZ5DEBs5vMD-s,28
 heavyball/foreach_adopt.py,sha256=fHnbEqvKKc5IKPDWC9Qo9PiISSjj1MEViy0Jb3BRgZQ,3582
 heavyball/foreach_laprop.py,sha256=EXkwFQ-H7hHWLmiNUsxUcmXhzNNLMjieHjfOlY_6kmo,2868
 heavyball/foreach_sfadamw.py,sha256=TeWf0nKXQEFcz02rADYRJenDM9mX1dGHhvILLks6OW8,3087
-heavyball/foreach_soap.py,sha256=408jRysE9ek0ea-TphhSBMTa9zcjkgMX3qlx8qTCt34,4803
+heavyball/foreach_soap.py,sha256=Tgwg4_Sir9nI_3R85f8NMQagquUBJmAEMQqh0uD3b0Y,4771
 heavyball/p_adam.py,sha256=qEcuU8VEc35vaWAXjT0O65vfCuNn_3ttwL4RlJKN3Xw,6389
 heavyball/palm_foreach_sfadamw.py,sha256=1qOr-uniSmI1sNCJc1SnvyKH5iFu80Z6H5h93lDTwcE,3410
-heavyball/palm_foreach_soap.py,sha256=cExM9nTC3zAgsRr42VOIMWNwYA4dAJaA8-pIo7SWilc,6230
-heavyball/precond_schedule_foreach_soap.py,sha256=EL_Z-v5l7BC98QgI-Zg9iyM77TAreVgD5Zln59ewGoI,4966
-heavyball/precond_schedule_palm_foreach_soap.py,sha256=HWo2t7yY-_n4pPGmDiELccy0jdELTVhdlH-eyFBih5k,6502
+heavyball/palm_foreach_soap.py,sha256=zSjpYYm1hfgIudjo_q3ozu3Vkfhz8w8im1c-ou1U3sI,6198
+heavyball/precond_schedule_foreach_soap.py,sha256=p7oD2bESyCPsdGkJYhHluraDb_1K5Q28RNL6fIvD5C8,4969
+heavyball/precond_schedule_palm_foreach_soap.py,sha256=Sb3Fhv-EG28_oXnbVpE0iHe5R8i5_hltqoi_DgPuoEU,6505
 heavyball/precond_schedule_sfpsoap.py,sha256=KUKdZzd336w24zPRcqwRatj7IVmd1Us0a_VuzASluIo,7565
 heavyball/psgd_kron.py,sha256=PtTe6eR547Y-4CvgjpchgkQsr_kWr4AN-uY9L_JO_C8,6088
 heavyball/pure_psgd.py,sha256=344NdVNHwUFX3fU2R1S_Xh9SXAML3E4ryHr7xfMh9Cc,5076
 heavyball/schedule_free_palm_foreach_soap.py,sha256=KTQY37MZH7YnOSTLKY8uVySUXxWXbFVUA1QXN3iv8Ds,7244
-heavyball/utils.py,sha256=AxhcHzbFAvhTgTFyIcdxs9TJkH4AgVEaNeBRjOLzoBM,40095
-heavyball-0.24.3.dist-info/LICENSE,sha256=CGdGJim64YifGmUVPaeyRsxkvyExtClswhRNIp8FY_U,1322
-heavyball-0.24.3.dist-info/METADATA,sha256=32T-Q-a4k096KjxoR-3DQt25XpO_h0zs7lWKTDQLugI,11926
-heavyball-0.24.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-heavyball-0.24.3.dist-info/top_level.txt,sha256=SzCxSVg_qCUPA4kZObW3Zyo4v-d_mMOD-p7a-WXTl2E,10
-heavyball-0.24.3.dist-info/RECORD,,
+heavyball/utils.py,sha256=QAHOZj__Kn1vpSSBm6otfKb656bqoHUyZXrVJrB_23U,40145
+heavyball-0.25.0.dist-info/LICENSE,sha256=CGdGJim64YifGmUVPaeyRsxkvyExtClswhRNIp8FY_U,1322
+heavyball-0.25.0.dist-info/METADATA,sha256=T649SIhfWXSJVTRXJnYLctkD1fQJl95r05Mrhdw8nck,11926
+heavyball-0.25.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+heavyball-0.25.0.dist-info/top_level.txt,sha256=SzCxSVg_qCUPA4kZObW3Zyo4v-d_mMOD-p7a-WXTl2E,10
+heavyball-0.25.0.dist-info/RECORD,,

{heavyball-0.24.3.dist-info → heavyball-0.25.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{heavyball-0.24.3.dist-info → heavyball-0.25.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{heavyball-0.24.3.dist-info → heavyball-0.25.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

heavyball 0.24.3__py3-none-any.whl → 0.25.0__py3-none-any.whl

heavyball 0.24.3py3-none-any.whl → 0.25.0py3-none-any.whl