PyPI - heavyball - Versions diffs - 0.24.2__py3-none-any.whl → 0.24.4__py3-none-any.whl - Mend

heavyball 0.24.2py3-none-any.whl → 0.24.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

heavyball/cached_delayed_psgd_kron.py +1 -1
heavyball/cached_psgd_kron.py +5 -2
heavyball/delayed_psgd.py +1 -1
heavyball/foreach_soap.py +2 -3
heavyball/palm_foreach_soap.py +2 -3
heavyball/precond_schedule_foreach_soap.py +2 -2
heavyball/precond_schedule_palm_foreach_soap.py +2 -2
heavyball/utils.py +5 -4
{heavyball-0.24.2.dist-info → heavyball-0.24.4.dist-info}/METADATA +1 -1
heavyball-0.24.4.dist-info/RECORD +24 -0
heavyball-0.24.2.dist-info/RECORD +0 -24
{heavyball-0.24.2.dist-info → heavyball-0.24.4.dist-info}/LICENSE +0 -0
{heavyball-0.24.2.dist-info → heavyball-0.24.4.dist-info}/WHEEL +0 -0
{heavyball-0.24.2.dist-info → heavyball-0.24.4.dist-info}/top_level.txt +0 -0

heavyball/cached_delayed_psgd_kron.py CHANGED Viewed

@@ -22,7 +22,7 @@ class ForeachCachedDelayedPSGDKron(PSGDBase):
         params (iterable): Iterable of parameters to optimize or dicts defining
             parameter groups.
         lr (float): Learning rate.
-        b1 (float): Momentum parameter.
+        beta (float): Momentum parameter.
         weight_decay (float): Weight decay (L2 penalty).
         preconditioner_update_probability (callable or float, optional): Probability of
             updating the preconditioner. If None, defaults to a schedule that anneals

heavyball/cached_psgd_kron.py CHANGED Viewed

@@ -19,7 +19,7 @@ class ForeachCachedPSGDKron(PSGDBase):
         params (iterable): Iterable of parameters to optimize or dicts defining
             parameter groups.
         lr (float): Learning rate.
-        b1 (float): Momentum parameter.
+        beta (float): Momentum parameter.
         weight_decay (float): Weight decay (L2 penalty).
         preconditioner_update_probability (callable or float, optional): Probability of
             updating the preconditioner. If None, defaults to a schedule that anneals
@@ -41,6 +41,7 @@ class ForeachCachedPSGDKron(PSGDBase):
                  split: bool = False, clip_fn: Optional[callable] = None, store_triu_as_line: bool = True,
                  foreach: bool = True, q_dtype='float32', stochastic_schedule: bool = True,
                  storage_dtype: str = 'float32', mars: bool = False, caution: bool = False, mars_gamma: float = 0.0025,
+                 orthogonalize_output: bool = False,
                  #
                  # expert parameters
                  precond_init_scale=1.0, precond_lr=0.1):
@@ -59,7 +60,8 @@ class ForeachCachedPSGDKron(PSGDBase):
                         momentum_into_precond_update=momentum_into_precond_update, precond_lr=precond_lr,
                         precond_init_scale=precond_init_scale, step=0, warmup_steps=warmup_steps, merge_dims=merge_dims,
                         split=split, store_triu_as_line=store_triu_as_line, q_dtype=q_dtype,
-                        storage_dtype=storage_dtype, caution=caution, mars_gamma=mars_gamma, mars=mars)
+                        storage_dtype=storage_dtype, caution=caution, mars_gamma=mars_gamma, mars=mars,
+                        orthogonalize_output=orthogonalize_output)
         super().__init__(params, defaults, foreach, stochastic_schedule, clip_fn, preconditioner_update_probability)
     def _step(self, group):
@@ -75,6 +77,7 @@ class ForeachCachedPSGDKron(PSGDBase):
         store_triu_as_line = group['store_triu_as_line']
         q_dtype = getattr(torch, group['q_dtype'])
         storage_dtype = getattr(torch, group['storage_dtype'])
+        orthogonalize_output = group['orthogonalize_output']
         should_update = self.should_update(group)
         vals = []

heavyball/delayed_psgd.py CHANGED Viewed

@@ -25,7 +25,7 @@ class ForeachDelayedPSGD(PSGDBase):
         params (iterable): Iterable of parameters to optimize or dicts defining
             parameter groups.
         lr (float): Learning rate.
-        b1 (float): Momentum parameter.
+        beta (float): Momentum parameter.
         weight_decay (float): Weight decay (L2 penalty).
         preconditioner_update_probability (callable or float, optional): Probability of
             updating the preconditioner. If None, defaults to a schedule that anneals

heavyball/foreach_soap.py CHANGED Viewed

@@ -71,12 +71,11 @@ class ForeachSOAP(StatefulOptimizer):
         # Decay the first and second moment running average coefficient
         # In-place operations to update the averages at the same time
         step_tensor = torch.empty((), dtype=torch.int32, device=p_list[0].device).fill_(step)
-        denom = exp_avg_(exp_avg, exp_avg_sq, grad, grad_projected, beta1, beta2, step_tensor)
         step_size = -group["lr"] * min(step / group['warmup_steps'], 1)
-        for p, g, ea, d in zip(p_list, grad, exp_avg, denom):
-            state = self.state_(p)
+        for p, g, gp, ea, eas in zip(p_list, grad, grad_projected, exp_avg, exp_avg_sq):
+            d = exp_avg_(ea, eas, g, gp, beta1, beta2, step_tensor)[0]
             # Projecting the exponential moving average of gradients to the eigenbases of Shampoo's preconditioner
             # i.e. projecting to the eigenbases of matrices in state['GG']
             exp_avg_projected = project(ea, state['Q'], False)

heavyball/palm_foreach_soap.py CHANGED Viewed

@@ -81,11 +81,10 @@ class PaLMForeachSOAP(StatefulOptimizer):
         # In-place operations to update the averages at the same time
         beta2 = torch.empty((), dtype=torch.float32, device=p_list[0].device).fill_(beta2)
         step_tensor = torch.empty((), dtype=torch.int32, device=p_list[0].device).fill_(step)
-        denom = exp_avg_(exp_avg, exp_avg_sq, grad, grad_projected, beta1, beta2, step_tensor)
         step_size = -group["lr"] * min(step / group['warmup_steps'], 1)
-        for p, g, ea, d in zip(p_list, grad, exp_avg, denom):
-            state = self.state_(p)
+        for p, g, gp, ea, eas in zip(p_list, grad, grad_projected, exp_avg, exp_avg_sq):
+            d = exp_avg_(ea, eas, g, gp, beta1, beta2, step_tensor)[0]
             # Projecting the exponential moving average of gradients to the eigenbases of Shampoo's preconditioner
             # i.e. projecting to the eigenbases of matrices in state['GG']
             exp_avg_projected = project(ea, state['Q'], False)

heavyball/precond_schedule_foreach_soap.py CHANGED Viewed

@@ -73,12 +73,12 @@ class PrecondScheduleForeachSOAP(StatefulOptimizer):
         # Decay the first and second moment running average coefficient
         # In-place operations to update the averages at the same time
         step_tensor = torch.empty((), dtype=torch.int32, device=p_list[0].device).fill_(step)
-        denom = exp_avg_(exp_avg, exp_avg_sq, grad, grad_projected, beta1, beta2, step_tensor)
         update_precond = precond_schedule(step, group['precond_scheduler'], self.rng)
         step_size = -group["lr"] * min(step / group['warmup_steps'], 1)
-        for p, g, ea, d in zip(p_list, grad, exp_avg, denom):
+        for p, g, gp, ea, eas in zip(p_list, grad, grad_projected, exp_avg, exp_avg_sq):
+            d = exp_avg_(ea, eas, g, gp, beta1, beta2, step_tensor)[0]
             state = self.state_(p)
             # Projecting the exponential moving average of gradients to the eigenbases of Shampoo's preconditioner
             # i.e. projecting to the eigenbases of matrices in state['GG']

heavyball/precond_schedule_palm_foreach_soap.py CHANGED Viewed

@@ -84,12 +84,12 @@ class PrecondSchedulePaLMForeachSOAP(StatefulOptimizer):
         # In-place operations to update the averages at the same time
         beta2 = torch.empty((), dtype=torch.float32, device=p_list[0].device).fill_(beta2)
         step_tensor = torch.empty((), dtype=torch.int32, device=p_list[0].device).fill_(step)
-        denom = exp_avg_(exp_avg, exp_avg_sq, grad, grad_projected, beta1, beta2, step_tensor)
         update_precond = precond_schedule(step, group['precond_scheduler'], self.rng)
         step_size = -group["lr"] * min(step / group['warmup_steps'], 1)
-        for p, g, ea, d in zip(p_list, grad, exp_avg, denom):
+        for p, g, gp, ea, eas in zip(p_list, grad, grad_projected, exp_avg, exp_avg_sq):
+            d = exp_avg_(ea, eas, g, gp, beta1, beta2, step_tensor)[0]
             state = self.state_(p)
             # Projecting the exponential moving average of gradients to the eigenbases of Shampoo's preconditioner
             # i.e. projecting to the eigenbases of matrices in state['GG']

heavyball/utils.py CHANGED Viewed

@@ -23,7 +23,8 @@ def decorator(func):
     @functools.wraps(func)
     def _fn(*args, **kwargs):
-        if is_compiling() or compile_mode is None:
+        disable = compile_mode_recommended_to_none is None
+        if is_compiling() or compile_mode_recommended_to_none is None:
             return func(*args, **kwargs)
         nonlocal compiled
         if compiled is None:
@@ -874,7 +875,7 @@ def psgd_lb(A, max_abs):
     return x
-@decorator_knowngood
+@decorator
 def psgd_update_precond(Q, exprs, G, precond_lr, tiny, oq, store_triu_as_line):
     """Update Kronecker product preconditioner Q with pair (V, G)."""
     exprA, exprGs, _ = exprs
@@ -1130,11 +1131,11 @@ def merge_group(group, *tensors):
             'max_precond_dim'], group.get('split', False)))
     return out
 def hook_optimizer_into_model(model, optimizer, *args, **kwargs):
     def _step(p: Tensor, o: torch.optim.Optimizer):
         o.step()
         o.zero_grad()
     for p in model.parameters():
-        p.register_post_accumulate_grad_hook(functools.partial(_step, o=optimizer([p], *args, **kwargs)))
+        p.register_post_accumulate_grad_hook(functools.partial(_step, o=optimizer([p], *args, **kwargs)))

{heavyball-0.24.2.dist-info → heavyball-0.24.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: heavyball
-Version: 0.24.2
+Version: 0.24.4
 Summary: Efficient optimizers
 Home-page: https://github.com/clashluke/heavyball
 Author: Lucas Nestler

heavyball-0.24.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,24 @@
+heavyball/__init__.py,sha256=icHYN-MGsmHkLUlHCMcZkOlwY7GT63_ayR_a5iPKmzM,2226
+heavyball/cached_delayed_psgd_kron.py,sha256=HEyT6vW6Le6FmWpf-vAEzgbAkPH2mByqXcVZn07KCMk,6866
+heavyball/cached_psgd_kron.py,sha256=rOgWAeVMENI7kdoBuRo3ywrCeatAnIqBdeYPHuVk2aU,6998
+heavyball/delayed_psgd.py,sha256=L6qRLPxJmJ_1e0Mk2zLYUEVxkt8NGHq6v3HKawlgFcU,6334
+heavyball/foreach_adamw.py,sha256=K4xTes4drylAqaqWky8O_Bg_mmbAmcHZ5DEBs5vMD-s,2860
+heavyball/foreach_adopt.py,sha256=fHnbEqvKKc5IKPDWC9Qo9PiISSjj1MEViy0Jb3BRgZQ,3582
+heavyball/foreach_laprop.py,sha256=EXkwFQ-H7hHWLmiNUsxUcmXhzNNLMjieHjfOlY_6kmo,2868
+heavyball/foreach_sfadamw.py,sha256=TeWf0nKXQEFcz02rADYRJenDM9mX1dGHhvILLks6OW8,3087
+heavyball/foreach_soap.py,sha256=Tgwg4_Sir9nI_3R85f8NMQagquUBJmAEMQqh0uD3b0Y,4771
+heavyball/p_adam.py,sha256=qEcuU8VEc35vaWAXjT0O65vfCuNn_3ttwL4RlJKN3Xw,6389
+heavyball/palm_foreach_sfadamw.py,sha256=1qOr-uniSmI1sNCJc1SnvyKH5iFu80Z6H5h93lDTwcE,3410
+heavyball/palm_foreach_soap.py,sha256=zSjpYYm1hfgIudjo_q3ozu3Vkfhz8w8im1c-ou1U3sI,6198
+heavyball/precond_schedule_foreach_soap.py,sha256=p7oD2bESyCPsdGkJYhHluraDb_1K5Q28RNL6fIvD5C8,4969
+heavyball/precond_schedule_palm_foreach_soap.py,sha256=Sb3Fhv-EG28_oXnbVpE0iHe5R8i5_hltqoi_DgPuoEU,6505
+heavyball/precond_schedule_sfpsoap.py,sha256=KUKdZzd336w24zPRcqwRatj7IVmd1Us0a_VuzASluIo,7565
+heavyball/psgd_kron.py,sha256=PtTe6eR547Y-4CvgjpchgkQsr_kWr4AN-uY9L_JO_C8,6088
+heavyball/pure_psgd.py,sha256=344NdVNHwUFX3fU2R1S_Xh9SXAML3E4ryHr7xfMh9Cc,5076
+heavyball/schedule_free_palm_foreach_soap.py,sha256=KTQY37MZH7YnOSTLKY8uVySUXxWXbFVUA1QXN3iv8Ds,7244
+heavyball/utils.py,sha256=AxhcHzbFAvhTgTFyIcdxs9TJkH4AgVEaNeBRjOLzoBM,40095
+heavyball-0.24.4.dist-info/LICENSE,sha256=CGdGJim64YifGmUVPaeyRsxkvyExtClswhRNIp8FY_U,1322
+heavyball-0.24.4.dist-info/METADATA,sha256=oksy8cvcHSdMEs9Mzv7WDAVkkkUPcpA1uYNlUgZM_bk,11926
+heavyball-0.24.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+heavyball-0.24.4.dist-info/top_level.txt,sha256=SzCxSVg_qCUPA4kZObW3Zyo4v-d_mMOD-p7a-WXTl2E,10
+heavyball-0.24.4.dist-info/RECORD,,

heavyball-0.24.2.dist-info/RECORD DELETED Viewed

@@ -1,24 +0,0 @@
-heavyball/__init__.py,sha256=icHYN-MGsmHkLUlHCMcZkOlwY7GT63_ayR_a5iPKmzM,2226
-heavyball/cached_delayed_psgd_kron.py,sha256=cHwVDq-_284_eMt09rAq26D_8fv3N0e0wdN1woCHU1M,6864
-heavyball/cached_psgd_kron.py,sha256=ttg6bemNDRpCJBV3aJg2DSyVfsfTMZAnhErgwC2jXlw,6815
-heavyball/delayed_psgd.py,sha256=yHy83YQ_PKWtwQq1R_OVyj3cjmcbsZAXX1M-hGyciss,6332
-heavyball/foreach_adamw.py,sha256=K4xTes4drylAqaqWky8O_Bg_mmbAmcHZ5DEBs5vMD-s,2860
-heavyball/foreach_adopt.py,sha256=fHnbEqvKKc5IKPDWC9Qo9PiISSjj1MEViy0Jb3BRgZQ,3582
-heavyball/foreach_laprop.py,sha256=EXkwFQ-H7hHWLmiNUsxUcmXhzNNLMjieHjfOlY_6kmo,2868
-heavyball/foreach_sfadamw.py,sha256=TeWf0nKXQEFcz02rADYRJenDM9mX1dGHhvILLks6OW8,3087
-heavyball/foreach_soap.py,sha256=408jRysE9ek0ea-TphhSBMTa9zcjkgMX3qlx8qTCt34,4803
-heavyball/p_adam.py,sha256=qEcuU8VEc35vaWAXjT0O65vfCuNn_3ttwL4RlJKN3Xw,6389
-heavyball/palm_foreach_sfadamw.py,sha256=1qOr-uniSmI1sNCJc1SnvyKH5iFu80Z6H5h93lDTwcE,3410
-heavyball/palm_foreach_soap.py,sha256=cExM9nTC3zAgsRr42VOIMWNwYA4dAJaA8-pIo7SWilc,6230
-heavyball/precond_schedule_foreach_soap.py,sha256=EL_Z-v5l7BC98QgI-Zg9iyM77TAreVgD5Zln59ewGoI,4966
-heavyball/precond_schedule_palm_foreach_soap.py,sha256=HWo2t7yY-_n4pPGmDiELccy0jdELTVhdlH-eyFBih5k,6502
-heavyball/precond_schedule_sfpsoap.py,sha256=KUKdZzd336w24zPRcqwRatj7IVmd1Us0a_VuzASluIo,7565
-heavyball/psgd_kron.py,sha256=PtTe6eR547Y-4CvgjpchgkQsr_kWr4AN-uY9L_JO_C8,6088
-heavyball/pure_psgd.py,sha256=344NdVNHwUFX3fU2R1S_Xh9SXAML3E4ryHr7xfMh9Cc,5076
-heavyball/schedule_free_palm_foreach_soap.py,sha256=KTQY37MZH7YnOSTLKY8uVySUXxWXbFVUA1QXN3iv8Ds,7244
-heavyball/utils.py,sha256=FglgQfiE206I07rql3qP-X2C1j0hY3N5VcQwKUh08aA,40025
-heavyball-0.24.2.dist-info/LICENSE,sha256=CGdGJim64YifGmUVPaeyRsxkvyExtClswhRNIp8FY_U,1322
-heavyball-0.24.2.dist-info/METADATA,sha256=lTThJQbW6qbnQqy9lGlTTOttJcX5vfQ_s6Cm0arqfC8,11926
-heavyball-0.24.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-heavyball-0.24.2.dist-info/top_level.txt,sha256=SzCxSVg_qCUPA4kZObW3Zyo4v-d_mMOD-p7a-WXTl2E,10
-heavyball-0.24.2.dist-info/RECORD,,

{heavyball-0.24.2.dist-info → heavyball-0.24.4.dist-info}/LICENSE RENAMED Viewed

File without changes

{heavyball-0.24.2.dist-info → heavyball-0.24.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{heavyball-0.24.2.dist-info → heavyball-0.24.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

heavyball 0.24.2__py3-none-any.whl → 0.24.4__py3-none-any.whl

heavyball 0.24.2py3-none-any.whl → 0.24.4py3-none-any.whl