PyPI - heavyball - Versions diffs - 0.23.4__py3-none-any.whl → 0.24.0__py3-none-any.whl - Mend

heavyball 0.23.4py3-none-any.whl → 0.24.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

heavyball/cached_delayed_psgd_kron.py +1 -1
heavyball/cached_psgd_kron.py +1 -1
heavyball/delayed_psgd.py +1 -1
heavyball/foreach_adamw.py +2 -2
heavyball/foreach_adopt.py +2 -2
heavyball/foreach_laprop.py +2 -2
heavyball/foreach_sfadamw.py +2 -2
heavyball/foreach_soap.py +2 -2
heavyball/p_adam.py +2 -2
heavyball/palm_foreach_sfadamw.py +2 -2
heavyball/palm_foreach_soap.py +2 -2
heavyball/precond_schedule_foreach_soap.py +2 -2
heavyball/precond_schedule_palm_foreach_soap.py +2 -2
heavyball/precond_schedule_sfpsoap.py +2 -2
heavyball/psgd_kron.py +1 -1
heavyball/schedule_free_palm_foreach_soap.py +1 -1
heavyball/utils.py +2 -2
{heavyball-0.23.4.dist-info → heavyball-0.24.0.dist-info}/METADATA +1 -1
heavyball-0.24.0.dist-info/RECORD +24 -0
heavyball-0.23.4.dist-info/RECORD +0 -24
{heavyball-0.23.4.dist-info → heavyball-0.24.0.dist-info}/LICENSE +0 -0
{heavyball-0.23.4.dist-info → heavyball-0.24.0.dist-info}/WHEEL +0 -0
{heavyball-0.23.4.dist-info → heavyball-0.24.0.dist-info}/top_level.txt +0 -0

heavyball/cached_delayed_psgd_kron.py CHANGED Viewed

@@ -86,7 +86,7 @@ class ForeachCachedDelayedPSGDKron(PSGDBase):
             state = self.state_(p)
             if 'Q' not in state:
-                state["exp_avg"] = torch.zeros_like(g, dtype=storage_dtype)
+                state["exp_avg"] = torch.zeros_like(g, dtype=storage_dtype, memory_format=torch.preserve_format)
                 Q, state["exprs"] = init_Q_exprs(p, precond_init_scale, max_size_triangular, min_ndim_triangular,
                                                  memory_save_mode, dtype=q_dtype)
                 state['Q'] = triu_to_line(Q) if store_triu_as_line else Q

heavyball/cached_psgd_kron.py CHANGED Viewed

@@ -83,7 +83,7 @@ class ForeachCachedPSGDKron(PSGDBase):
             state = self.state_(p)
             if 'Q' not in state:
-                state["exp_avg"] = torch.zeros_like(g, dtype=storage_dtype)
+                state["exp_avg"] = torch.zeros_like(g, dtype=storage_dtype, memory_format=torch.preserve_format)
                 Q, state["exprs"] = init_Q_exprs(p, precond_init_scale, max_size_triangular, min_ndim_triangular,
                                                  memory_save_mode, dtype=q_dtype)
                 state['Q'] = triu_to_line(Q) if store_triu_as_line else Q

heavyball/delayed_psgd.py CHANGED Viewed

@@ -89,7 +89,7 @@ class ForeachDelayedPSGD(PSGDBase):
             state = self.state_(p)
             if 'Q' not in state:
-                state["exp_avg"] = torch.zeros_like(g, dtype=storage_dtype)
+                state["exp_avg"] = torch.zeros_like(g, dtype=storage_dtype, memory_format=torch.preserve_format)
                 Q, state["exprs"] = init_Q_exprs(p, precond_init_scale, max_size_triangular, min_ndim_triangular,
                                                  memory_save_mode, dtype=q_dtype)
                 state["Q"] = triu_to_line(Q) if store_triu_as_line else Q

heavyball/foreach_adamw.py CHANGED Viewed

@@ -45,8 +45,8 @@ class ForeachAdamW(StatefulOptimizer):
         for p in active_p:
             if 'exp_avg' not in self.state_(p):
-                self.state_(p)['exp_avg'] = torch.zeros_like(p.data, dtype=storage_dtype)
-                self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=storage_dtype)
+                self.state_(p)['exp_avg'] = torch.zeros_like(p.data, dtype=storage_dtype, memory_format=torch.preserve_format)
+                self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=storage_dtype, memory_format=torch.preserve_format)
         y, grad, exp_avg_sq, exp_avg = zip(
             *[(p.data, p.grad, self.state_(p)['exp_avg_sq'], self.state_(p)['exp_avg']) for p in active_p])

heavyball/foreach_adopt.py CHANGED Viewed

@@ -51,8 +51,8 @@ class ForeachADOPT(StatefulOptimizer):
         for p in active_p:
             if 'exp_avg' not in self.state_(p):
-                self.state_(p)['exp_avg'] = torch.zeros_like(p.data, dtype=storage_dtype)
-                self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=storage_dtype)
+                self.state_(p)['exp_avg'] = torch.zeros_like(p.data, dtype=storage_dtype, memory_format=torch.preserve_format)
+                self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=storage_dtype, memory_format=torch.preserve_format)
         y, grad, exp_avg_sq, exp_avg = zip(
             *[(p.data, p.grad, self.state_(p)['exp_avg_sq'], self.state_(p)['exp_avg']) for p in active_p])

heavyball/foreach_laprop.py CHANGED Viewed

@@ -47,8 +47,8 @@ class ForeachLaProp(StatefulOptimizer):
         for p in active_p:
             if 'exp_avg' not in self.state_(p):
-                self.state_(p)['exp_avg'] = torch.zeros_like(p.data, dtype=storage_dtype)
-                self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=storage_dtype)
+                self.state_(p)['exp_avg'] = torch.zeros_like(p.data, dtype=storage_dtype, memory_format=torch.preserve_format)
+                self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=storage_dtype, memory_format=torch.preserve_format)
         y, grad, exp_avg_sq, exp_avg = zip(
             *[(p.data, p.grad, self.state_(p)['exp_avg_sq'], self.state_(p)['exp_avg'])  #

heavyball/foreach_sfadamw.py CHANGED Viewed

@@ -50,8 +50,8 @@ class ForeachSFAdamW(ScheduleFree):
         for p in active_p:
             if 'z' not in self.state_(p):
-                self.state_(p)['z'] = torch.clone(p.data)
-                self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=storage_dtype)
+                self.state_(p)['z'] = torch.clone(p.data, memory_format=torch.preserve_format)
+                self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=storage_dtype, memory_format=torch.preserve_format)
         y, grad, exp_avg_sq, z = zip(*[(p.data, p.grad, self.state_(p)['exp_avg_sq'], self.state_(p)['z'])  #
                                        for p in active_p])

heavyball/foreach_soap.py CHANGED Viewed

@@ -48,8 +48,8 @@ class ForeachSOAP(StatefulOptimizer):
             step = state['step'] = state.get("step", -1) + 1
             if "exp_avg" not in state:
-                state["exp_avg"] = torch.zeros_like(g, dtype=torch.float32)
-                state["exp_avg_sq"] = torch.zeros_like(g, dtype=torch.float32)
+                state["exp_avg"] = torch.zeros_like(g, dtype=torch.float32, memory_format=torch.preserve_format)
+                state["exp_avg_sq"] = torch.zeros_like(g, dtype=torch.float32, memory_format=torch.preserve_format)
                 init_preconditioner(g, state, max_precond_dim, precondition_1d)
                 update_preconditioner(g, state, max_precond_dim, precondition_1d, 0, True)
                 continue  # first step is skipped so that we never use the current gradients in the projection.

heavyball/p_adam.py CHANGED Viewed

@@ -81,8 +81,8 @@ class ForeachPaLMPAdam(PSGDBase):
             state = self.state_(p)
             if 'Q' not in state:
-                state['exp_avg'] = torch.zeros_like(g, dtype=storage_dtype)
-                state['exp_avg_sq'] = torch.zeros_like(g, dtype=storage_dtype)
+                state['exp_avg'] = torch.zeros_like(g, dtype=storage_dtype, memory_format=torch.preserve_format)
+                state['exp_avg_sq'] = torch.zeros_like(g, dtype=storage_dtype, memory_format=torch.preserve_format)
                 Q, state["exprs"] = init_Q_exprs(p, precond_init_scale, max_size_triangular, min_ndim_triangular,
                                                  memory_save_mode, dtype=q_dtype)
                 state['Q'] = triu_to_line(Q) if store_triu_as_line else Q

heavyball/palm_foreach_sfadamw.py CHANGED Viewed

@@ -54,8 +54,8 @@ class PaLMForeachSFAdamW(ScheduleFree):
         for p in active_p:
             if 'z' not in self.state_(p):
-                self.state_(p)['z'] = torch.clone(p.data)
-                self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=storage_dtype)
+                self.state_(p)['z'] = torch.clone(p.data, memory_format=torch.preserve_format)
+                self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=storage_dtype, memory_format=torch.preserve_format)
         # Decay the first moment running average coefficient
         beta2 = 1 - (k + 1) ** -group['beta2_scale']

heavyball/palm_foreach_soap.py CHANGED Viewed

@@ -56,8 +56,8 @@ class PaLMForeachSOAP(StatefulOptimizer):
             step = state['step'] = state.get("step", -1) + 1
             if "exp_avg" not in state:
-                state["exp_avg"] = torch.zeros_like(g, dtype=torch.float32)
-                state["exp_avg_sq"] = torch.zeros_like(g, dtype=torch.float32)
+                state["exp_avg"] = torch.zeros_like(g, dtype=torch.float32, memory_format=torch.preserve_format)
+                state["exp_avg_sq"] = torch.zeros_like(g, dtype=torch.float32, memory_format=torch.preserve_format)
                 init_preconditioner(g, state, max_precond_dim, precondition_1d)
                 update_preconditioner(g, state, max_precond_dim, precondition_1d, 0, True)
                 continue  # first step is skipped so that we never use the current gradients in the projection.

heavyball/precond_schedule_foreach_soap.py CHANGED Viewed

@@ -50,8 +50,8 @@ class PrecondScheduleForeachSOAP(StatefulOptimizer):
             step = state['step'] = state.get("step", -1) + 1
             if "exp_avg" not in state:
-                state["exp_avg"] = torch.zeros_like(g, dtype=torch.float32)
-                state["exp_avg_sq"] = torch.zeros_like(g, dtype=torch.float32)
+                state["exp_avg"] = torch.zeros_like(g, dtype=torch.float32, memory_format=torch.preserve_format)
+                state["exp_avg_sq"] = torch.zeros_like(g, dtype=torch.float32, memory_format=torch.preserve_format)
                 init_preconditioner(g, state, max_precond_dim, precondition_1d)
                 update_preconditioner(g, state, max_precond_dim, precondition_1d, 0, True)
                 continue  # first step is skipped so that we never use the current gradients in the projection.

heavyball/precond_schedule_palm_foreach_soap.py CHANGED Viewed

@@ -58,8 +58,8 @@ class PrecondSchedulePaLMForeachSOAP(StatefulOptimizer):
             step = state['step'] = state.get("step", -1) + 1
             if "exp_avg" not in state:
-                state["exp_avg"] = torch.zeros_like(g, dtype=torch.float32)
-                state["exp_avg_sq"] = torch.zeros_like(g, dtype=torch.float32)
+                state["exp_avg"] = torch.zeros_like(g, dtype=torch.float32, memory_format=torch.preserve_format)
+                state["exp_avg_sq"] = torch.zeros_like(g, dtype=torch.float32, memory_format=torch.preserve_format)
                 init_preconditioner(g, state, max_precond_dim, precondition_1d)
                 update_preconditioner(g, state, max_precond_dim, precondition_1d, 0, True)
                 continue  # first step is skipped so that we never use the current gradients in the projection.

heavyball/precond_schedule_sfpsoap.py CHANGED Viewed

@@ -96,8 +96,8 @@ class PrecondScheduleSFPaLMSOAP(ScheduleFree):
             state = self.state_(p)
             if "z" not in state:
-                state["z"] = torch.clone(p.data)
-                state["exp_avg_sq"] = torch.zeros_like(g, dtype=torch.float32)
+                state["z"] = torch.clone(p.data, memory_format=torch.preserve_format)
+                state["exp_avg_sq"] = torch.zeros_like(g, dtype=torch.float32, memory_format=torch.preserve_format)
                 init_preconditioner(g, state, max_precond_dim, precondition_1d)
                 update_preconditioner(g, state, max_precond_dim, precondition_1d, 0, True)
                 continue  # first step is skipped so that we never use the current gradients in the projection.

heavyball/psgd_kron.py CHANGED Viewed

@@ -84,7 +84,7 @@ class ForeachPSGDKron(PSGDBase):
             state = self.state_(p)
             if 'Q' not in state:
-                state["exp_avg"] = torch.zeros_like(g, dtype=storage_dtype)
+                state["exp_avg"] = torch.zeros_like(g, dtype=storage_dtype, memory_format=torch.preserve_format)
                 Q, state["exprs"] = init_Q_exprs(p, precond_init_scale, max_size_triangular, min_ndim_triangular,
                                                  memory_save_mode, dtype=q_dtype)
                 state['Q'] = triu_to_line(Q) if store_triu_as_line else Q

heavyball/schedule_free_palm_foreach_soap.py CHANGED Viewed

@@ -90,7 +90,7 @@ class SFPaLMForeachSOAP(ScheduleFree):
             if "z" not in state:
                 state["z"] = torch.clone(p).float()
-                state["exp_avg_sq"] = torch.zeros_like(g, dtype=torch.float32)
+                state["exp_avg_sq"] = torch.zeros_like(g, dtype=torch.float32, memory_format=torch.preserve_format)
                 if mars:
                     state['mars_prev_grad'] = g.clone()
                 init_preconditioner(g, state, max_precond_dim, precondition_1d)

heavyball/utils.py CHANGED Viewed

@@ -60,11 +60,11 @@ def warmup(lr: float, step: int, warmup_steps: int):
 @decorator_knowngood
 def _compilable_schedule_free_(p: List[Tensor], z: List[Tensor], ckp1: Tensor, grad: List[Tensor], lr: Tensor,
                                beta1: Tensor):
-    p32, z32, g32 = [promote(x) for x in (p, z, grad)]
+    p32, z32, g32 = [list(map(promote, x)) for x in (p, z, grad)]
     for p_, z_, g_ in zip(p32, z32, g32):
         p_.lerp_(z_, ckp1)
         p_.add_(g_, alpha=lr * (beta1 * (1 - ckp1) - 1))
-        z_.add(g_, alpha=-lr)
+        z_.add_(g_, alpha=-lr)
     copy_stochastic_list_(p, p32)
     copy_stochastic_list_(z, z32)

{heavyball-0.23.4.dist-info → heavyball-0.24.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: heavyball
-Version: 0.23.4
+Version: 0.24.0
 Summary: Efficient optimizers
 Home-page: https://github.com/clashluke/heavyball
 Author: Lucas Nestler

heavyball-0.24.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,24 @@
+heavyball/__init__.py,sha256=icHYN-MGsmHkLUlHCMcZkOlwY7GT63_ayR_a5iPKmzM,2226
+heavyball/cached_delayed_psgd_kron.py,sha256=cHwVDq-_284_eMt09rAq26D_8fv3N0e0wdN1woCHU1M,6864
+heavyball/cached_psgd_kron.py,sha256=ttg6bemNDRpCJBV3aJg2DSyVfsfTMZAnhErgwC2jXlw,6815
+heavyball/delayed_psgd.py,sha256=yHy83YQ_PKWtwQq1R_OVyj3cjmcbsZAXX1M-hGyciss,6332
+heavyball/foreach_adamw.py,sha256=K4xTes4drylAqaqWky8O_Bg_mmbAmcHZ5DEBs5vMD-s,2860
+heavyball/foreach_adopt.py,sha256=fHnbEqvKKc5IKPDWC9Qo9PiISSjj1MEViy0Jb3BRgZQ,3582
+heavyball/foreach_laprop.py,sha256=EXkwFQ-H7hHWLmiNUsxUcmXhzNNLMjieHjfOlY_6kmo,2868
+heavyball/foreach_sfadamw.py,sha256=TeWf0nKXQEFcz02rADYRJenDM9mX1dGHhvILLks6OW8,3087
+heavyball/foreach_soap.py,sha256=408jRysE9ek0ea-TphhSBMTa9zcjkgMX3qlx8qTCt34,4803
+heavyball/p_adam.py,sha256=qEcuU8VEc35vaWAXjT0O65vfCuNn_3ttwL4RlJKN3Xw,6389
+heavyball/palm_foreach_sfadamw.py,sha256=1qOr-uniSmI1sNCJc1SnvyKH5iFu80Z6H5h93lDTwcE,3410
+heavyball/palm_foreach_soap.py,sha256=cExM9nTC3zAgsRr42VOIMWNwYA4dAJaA8-pIo7SWilc,6230
+heavyball/precond_schedule_foreach_soap.py,sha256=EL_Z-v5l7BC98QgI-Zg9iyM77TAreVgD5Zln59ewGoI,4966
+heavyball/precond_schedule_palm_foreach_soap.py,sha256=HWo2t7yY-_n4pPGmDiELccy0jdELTVhdlH-eyFBih5k,6502
+heavyball/precond_schedule_sfpsoap.py,sha256=KUKdZzd336w24zPRcqwRatj7IVmd1Us0a_VuzASluIo,7565
+heavyball/psgd_kron.py,sha256=PtTe6eR547Y-4CvgjpchgkQsr_kWr4AN-uY9L_JO_C8,6088
+heavyball/pure_psgd.py,sha256=344NdVNHwUFX3fU2R1S_Xh9SXAML3E4ryHr7xfMh9Cc,5076
+heavyball/schedule_free_palm_foreach_soap.py,sha256=KTQY37MZH7YnOSTLKY8uVySUXxWXbFVUA1QXN3iv8Ds,7244
+heavyball/utils.py,sha256=12DfrpBDiHAdFxN3cA3BA9tcailHw8wl5QTzEn4As98,39677
+heavyball-0.24.0.dist-info/LICENSE,sha256=CGdGJim64YifGmUVPaeyRsxkvyExtClswhRNIp8FY_U,1322
+heavyball-0.24.0.dist-info/METADATA,sha256=ZL_FTyrobNlPxhtgdVH6kZ9aD_jIq0SGALp7jo0BbiI,11926
+heavyball-0.24.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+heavyball-0.24.0.dist-info/top_level.txt,sha256=SzCxSVg_qCUPA4kZObW3Zyo4v-d_mMOD-p7a-WXTl2E,10
+heavyball-0.24.0.dist-info/RECORD,,

heavyball-0.23.4.dist-info/RECORD DELETED Viewed

@@ -1,24 +0,0 @@
-heavyball/__init__.py,sha256=icHYN-MGsmHkLUlHCMcZkOlwY7GT63_ayR_a5iPKmzM,2226
-heavyball/cached_delayed_psgd_kron.py,sha256=n3wIOhrop0Ls4MZ0kXpwGuImp1jzPs6VGdxIlPyoYdQ,6827
-heavyball/cached_psgd_kron.py,sha256=KCLsfvj9qh_2FNwRTdWM3zjnt2oGHfsf4Y341rPcceI,6778
-heavyball/delayed_psgd.py,sha256=xaAPNqE5Pg476fqXjST11Bi0zrZ8KjjU5h_NPUdwlZk,6295
-heavyball/foreach_adamw.py,sha256=IdcP5ggNB2SVDK3iNrNKGTGlEwWn18H77ClqCnJGB74,2786
-heavyball/foreach_adopt.py,sha256=NzHYoeiq1pFKn1RPHiVG2vJsHES30Blh5v2ypOWP2uQ,3508
-heavyball/foreach_laprop.py,sha256=myb0uwC-oZqYqeVSozas2JNMlbUkLCAMrVB9ZP4QOKQ,2794
-heavyball/foreach_sfadamw.py,sha256=B8xyL8Qxul4G1rsxMv8ZMlkYh1gaTpeCvCgkubaBAhE,3013
-heavyball/foreach_soap.py,sha256=7B_dP2Hm_xqwpBQiPYkv_c6eoRnU1dV2VZfvSoa4uJ8,4729
-heavyball/p_adam.py,sha256=8BlZ6YoaDXawMiRbCxo0Kd5_0-pAn0MQIhL0LHNaRBs,6315
-heavyball/palm_foreach_sfadamw.py,sha256=QzNXZOXEH6ufEPbnPg8ixn19WpVr4OhDreqnxIwcBVM,3336
-heavyball/palm_foreach_soap.py,sha256=IknGm_CzrqDIFEoCkejxjoZ4sfIy6RSoInqlMUOYLB4,6156
-heavyball/precond_schedule_foreach_soap.py,sha256=bJ2ifPFa8zEP9GO8eBpqZzsmP7p_iQkkCkllNeEMHPU,4892
-heavyball/precond_schedule_palm_foreach_soap.py,sha256=4dT9f134-Faq2KuCMCHzMtrkMO-es5p_DYS1of5yF-s,6428
-heavyball/precond_schedule_sfpsoap.py,sha256=ey-mUIjAy9ny5vJac0vRZHUXgef1bc7u7_-4hRkM4Rs,7491
-heavyball/psgd_kron.py,sha256=4eiGPXAFjvGIXLdiai1UJfAvTozAV1TXaE9UGkE4BLc,6051
-heavyball/pure_psgd.py,sha256=344NdVNHwUFX3fU2R1S_Xh9SXAML3E4ryHr7xfMh9Cc,5076
-heavyball/schedule_free_palm_foreach_soap.py,sha256=irvlIXF-oABpWWycZPMV-JG9XTiXSlgHtrM-ygfATic,7207
-heavyball/utils.py,sha256=FFZLqq_bnQUDXOMBO_hBu32yNMHi18W13wxlOJ0Q_78,39665
-heavyball-0.23.4.dist-info/LICENSE,sha256=CGdGJim64YifGmUVPaeyRsxkvyExtClswhRNIp8FY_U,1322
-heavyball-0.23.4.dist-info/METADATA,sha256=ebfSVWG2CeKxSfE5Ru0VipLE23DQiQKOmODVdlFW4aY,11926
-heavyball-0.23.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-heavyball-0.23.4.dist-info/top_level.txt,sha256=SzCxSVg_qCUPA4kZObW3Zyo4v-d_mMOD-p7a-WXTl2E,10
-heavyball-0.23.4.dist-info/RECORD,,

{heavyball-0.23.4.dist-info → heavyball-0.24.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{heavyball-0.23.4.dist-info → heavyball-0.24.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{heavyball-0.23.4.dist-info → heavyball-0.24.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

heavyball 0.23.4__py3-none-any.whl → 0.24.0__py3-none-any.whl

heavyball 0.23.4py3-none-any.whl → 0.24.0py3-none-any.whl