PyPI - heavyball - Versions diffs - 0.18.7__py3-none-any.whl → 0.18.8__py3-none-any.whl - Mend

heavyball 0.18.7py3-none-any.whl → 0.18.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

heavyball/foreach_adamw.py +20 -8
heavyball/foreach_adopt.py +29 -5
heavyball/foreach_laprop.py +20 -11
heavyball/foreach_sfadamw.py +24 -18
heavyball/palm_foreach_sfadamw.py +25 -18
heavyball/psgd_kron.py +2 -1
heavyball/utils.py +28 -20
{heavyball-0.18.7.dist-info → heavyball-0.18.8.dist-info}/METADATA +1 -1
{heavyball-0.18.7.dist-info → heavyball-0.18.8.dist-info}/RECORD +12 -12
{heavyball-0.18.7.dist-info → heavyball-0.18.8.dist-info}/LICENSE +0 -0
{heavyball-0.18.7.dist-info → heavyball-0.18.8.dist-info}/WHEEL +0 -0
{heavyball-0.18.7.dist-info → heavyball-0.18.8.dist-info}/top_level.txt +0 -0

heavyball/foreach_adamw.py CHANGED Viewed

@@ -1,7 +1,21 @@
 import torch
 import torch.optim
+from heavyball.utils import copy_stochastic_list_
-from .utils import warmup, exp_avg_sq_, beta_debias, update_param_, StatefulOptimizer
+from .utils import warmup, exp_avg_sq_, beta_debias, update_param_, StatefulOptimizer, promote
+@torch.compile(mode='max-autotune-no-cudagraphs', fullgraph=True, dynamic=True)
+def _compilable_step_(y, grad, exp_avg_sq, exp_avg, beta1, beta2, step, lr, eps, decay):
+    g32, exp_avg32, exp_avg_sq32 = [list(map(promote, x)) for x in [grad, exp_avg, exp_avg_sq]]
+    torch._foreach_lerp_(exp_avg32, g32, 1 - beta_debias(beta1, step + 1))
+    denom = list(exp_avg_sq_(exp_avg_sq32, g32, beta_debias(beta2, step + 1), eps))
+    update_param_(y, exp_avg32, lr, decay, lambda p, e, l: p.addcdiv_(e, denom.pop(0), value=l))
+    copy_stochastic_list_(exp_avg, exp_avg32)
+    copy_stochastic_list_(exp_avg_sq, exp_avg_sq32)
 class ForeachAdamW(StatefulOptimizer):
@@ -30,13 +44,11 @@ class ForeachAdamW(StatefulOptimizer):
                 self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=torch.float32)
         y, grad, exp_avg_sq, exp_avg = zip(
-            *[(p.data, p.grad.float(), self.state_(p)['exp_avg_sq'], self.state_(p)['exp_avg']) for p in active_p])
-        # Decay the first and second moment running average coefficient
-        torch._foreach_lerp_(exp_avg, grad, 1 - beta_debias(group['betas'][0], k + 1))
-        denom = list(exp_avg_sq_(exp_avg_sq, grad, beta_debias(group['betas'][1], k + 1), eps))
+            *[(p.data, p.grad, self.state_(p)['exp_avg_sq'], self.state_(p)['exp_avg']) for p in active_p])
-        # Normalize grad in-place for memory efficiency
         lr = -warmup(group['lr'], k + 1, group['warmup_steps'])
-        update_param_(y, exp_avg, lr, decay, lambda p, e, l: p.addcdiv_(e, denom.pop(0), value=l))
+        lr = torch.empty((), dtype=torch.float32, device=y[0].device).fill_(lr)
+        step = torch.empty((), dtype=torch.int32, device=y[0].device).fill_(k)
+        _compilable_step_(y, grad, exp_avg_sq, exp_avg, group['betas'][0], group['betas'][1], step, lr, eps, decay)
         group['k'] = k + 1

heavyball/foreach_adopt.py CHANGED Viewed

@@ -1,7 +1,27 @@
 import torch
 import torch.optim
+from heavyball.utils import copy_stochastic_list_
-from .utils import warmup, beta_debias, update_param_, StatefulOptimizer
+from .utils import warmup, beta_debias, update_param_, StatefulOptimizer, promote
+@torch.compile(mode='max-autotune-no-cudagraphs', fullgraph=True, dynamic=True)
+def _compilable_step_(y, grad, exp_avg_sq, exp_avg, beta1, beta2, step, lr, eps, decay):
+    g32, exp_avg32, exp_avg_sq32 = [list(map(promote, x)) for x in [grad, exp_avg, exp_avg_sq]]
+    update_param_(y, exp_avg, lr, decay)
+    beta1 = beta_debias(beta1, step)
+    denom = torch._foreach_sqrt(exp_avg_sq32)
+    torch._foreach_maximum_(denom, eps)
+    torch._foreach_mul_(exp_avg32, beta1)
+    [ea32.addcdiv_(g, d, value=1 - beta1) for ea32, g, d in zip(exp_avg32, g32, denom)]
+    beta2 = beta_debias(beta2, step + 1)
+    torch._foreach_mul_(exp_avg_sq32, beta2)
+    [eas32.addcmul_(g, g, value=1 - beta2) for eas32, g in zip(exp_avg_sq32, g32)]
+    copy_stochastic_list_(exp_avg, exp_avg32)
+    copy_stochastic_list_(exp_avg_sq, exp_avg_sq32)
 class ForeachADOPT(StatefulOptimizer):
@@ -31,12 +51,18 @@ class ForeachADOPT(StatefulOptimizer):
                 self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=torch.float32)
         y, grad, exp_avg_sq, exp_avg = zip(
-            *[(p.data, p.grad.float(), self.state_(p)['exp_avg_sq'], self.state_(p)['exp_avg']) for p in active_p])
+            *[(p.data, p.grad, self.state_(p)['exp_avg_sq'], self.state_(p)['exp_avg']) for p in active_p])
+        group['k'] = k + 1
         if k > 1:
             lr = -warmup(group['lr'], k - 1, group['warmup_steps'])
+            lr = torch.empty((), dtype=torch.float32, device=y[0].device).fill_(lr)
+            k = torch.empty((), dtype=torch.int32, device=y[0].device).fill_(k)
+            _compilable_step_(y, grad, exp_avg_sq, exp_avg, group['betas'][0], group['betas'][1], k, lr, eps, decay)
+            return
-            update_param_(y, exp_avg, lr, decay)
+        grad = [promote(g) for g in grad]
         if k > 0:
             beta1 = beta_debias(group['betas'][0], k)
             denom = torch._foreach_sqrt(exp_avg_sq)
@@ -48,5 +74,3 @@ class ForeachADOPT(StatefulOptimizer):
         torch._foreach_mul_(exp_avg_sq, beta2)
         torch._foreach_addcmul_(exp_avg_sq, grad, grad, value=1 - beta2)
         del grad
-        group['k'] = k + 1

heavyball/foreach_laprop.py CHANGED Viewed

@@ -1,7 +1,20 @@
 import torch
 import torch.optim
-from .utils import warmup, exp_avg_sq_, beta_debias, update_param_, StatefulOptimizer
+from .utils import warmup, exp_avg_sq_, beta_debias, update_param_, StatefulOptimizer, promote
+@torch.compile(mode='max-autotune-no-cudagraphs', fullgraph=True, dynamic=True)
+def _compilable_step_(y, grad, exp_avg_sq, exp_avg, beta1, beta2, step, lr, eps, decay):
+    g32, exp_avg32, exp_avg_sq32 = [list(map(promote, x)) for x in [grad, exp_avg, exp_avg_sq]]
+    denom = exp_avg_sq_(exp_avg_sq32, g32, beta_debias(beta2, step), eps)
+    beta1 = beta_debias(beta1, step)
+    torch._foreach_mul_(exp_avg32, beta1)
+    [ea32.addcdiv_(g, d, value=1 - beta1) for ea32, g, d in zip(exp_avg32, g32, denom)]
+    update_param_(y, exp_avg32, lr, decay)
 class ForeachLaProp(StatefulOptimizer):
@@ -31,17 +44,13 @@ class ForeachLaProp(StatefulOptimizer):
                 self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=torch.float32)
         y, grad, exp_avg_sq, exp_avg = zip(
-            *[(p.data, p.grad.float(), self.state_(p)['exp_avg_sq'], self.state_(p)['exp_avg']) for p in active_p])
-        # Decay the first and second moment running average coefficient
-        denom = exp_avg_sq_(exp_avg_sq, grad, beta_debias(group['betas'][1], k + 1), eps)
-        beta1 = beta_debias(group['betas'][0], k + 1)
-        torch._foreach_mul_(exp_avg, beta1)
-        torch._foreach_addcdiv_(exp_avg, grad, denom, 1 - beta1)
-        del grad
+            *[(p.data, p.grad, self.state_(p)['exp_avg_sq'], self.state_(p)['exp_avg'])  #
+              for p in active_p])
-        # Normalize grad in-place for memory efficiency
         lr = -warmup(group['lr'], k + 1, group['warmup_steps'])
-        update_param_(y, exp_avg, lr, decay)
+        lr = torch.empty((), dtype=torch.float32, device=y[0].device).fill_(lr)
+        step = torch.empty((), dtype=torch.int32, device=y[0].device).fill_(k + 1)
+        _compilable_step_(y, grad, exp_avg_sq, exp_avg, group['betas'][0], group['betas'][1], step, lr, eps, decay)
         group['k'] = k + 1

heavyball/foreach_sfadamw.py CHANGED Viewed

@@ -1,7 +1,23 @@
 import torch
 import torch.optim
+from heavyball.utils import get_ckp1
-from .utils import schedule_free_, warmup, ScheduleFree, exp_avg_sq_, beta_debias
+from .utils import warmup, ScheduleFree, exp_avg_sq_, beta_debias, promote, _compilable_schedule_free_
+@torch.compile(mode='max-autotune-no-cudagraphs', fullgraph=True, dynamic=True)
+def _compilable_step_(y, grad, exp_avg_sq, z, beta1, beta2, step, ckp1, eps, decay, lr):
+    old_debiased2 = beta_debias(beta2, step)
+    g32 = [promote(g_) for g_ in grad]
+    exp_avg_sq32 = [promote(e_) for e_ in exp_avg_sq]
+    denom = exp_avg_sq_(exp_avg_sq32, g32, old_debiased2, eps)
+    torch._foreach_div_(g32, denom)
+    if decay != 0:
+        torch._foreach_add_(g32, y, alpha=decay)
+    for p, z_, g in zip(y, z, g32):
+        _compilable_schedule_free_(p, z_, ckp1, g, lr, beta1)
 class ForeachSFAdamW(ScheduleFree):
@@ -31,24 +47,14 @@ class ForeachSFAdamW(ScheduleFree):
                 self.state_(p)['z'] = torch.clone(p.data)
                 self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=torch.float32)
-        y, grad, exp_avg_sq, z = zip(
-            *[(p.data, p.grad.float(), self.state_(p)['exp_avg_sq'], self.state_(p)['z']) for p in active_p])
-        # Decay the first moment running average coefficient
-        old_debiased = beta_debias(group['betas'][1], k + 1)
-        # Decay the first and second moment running average coefficient
-        denom = exp_avg_sq_(exp_avg_sq, grad, old_debiased, eps)
-        # Normalize grad in-place for memory efficiency
-        torch._foreach_div_(grad, denom)
-        # Weight decay calculated at y
-        if decay != 0:
-            torch._foreach_add_(grad, y, alpha=decay)
+        y, grad, exp_avg_sq, z = zip(*[(p.data, p.grad, self.state_(p)['exp_avg_sq'], self.state_(p)['z'])  #
+                                       for p in active_p])
         lr = warmup(group['lr'], k + 1, group['warmup_steps'])
-        group['weight_sum'] = schedule_free_(lr, group['weight_lr_power'], group['weight_sum'], group['betas'][0], y, z,
-                                             grad, group['r'], k + 1)
+        ckp1, group['weight_sum'] = get_ckp1(lr, group['weight_lr_power'], group['weight_sum'], group['r'], k + 1)
+        step = torch.empty((), dtype=torch.int32, device=y[0].device).fill_(k + 1)
+        ckp1 = torch.empty((), dtype=torch.float32, device=y[0].device).fill_(ckp1)
+        lr = torch.empty((), dtype=torch.float32, device=y[0].device).fill_(lr)
+        _compilable_step_(y, grad, exp_avg_sq, z, group['betas'][0], group['betas'][1], step, ckp1, eps, decay, lr)
         group['k'] = k + 1

heavyball/palm_foreach_sfadamw.py CHANGED Viewed

@@ -1,13 +1,27 @@
 import torch
 import torch.optim
-from .utils import schedule_free_, warmup, ScheduleFree, exp_avg_sq_, beta_debias
+from .utils import schedule_free_, warmup, ScheduleFree, exp_avg_sq_, beta_debias, get_ckp1, promote, _compilable_schedule_free_
+@torch.compile(mode='max-autotune-no-cudagraphs', fullgraph=True, dynamic=True)
+def _compilable_step_(y, grad, exp_avg_sq, z, beta1, beta2, step, ckp1, eps, decay, lr):
+    old_debiased2 = beta_debias(beta2, step)
+    g32 = [promote(g_) for g_ in grad]
+    exp_avg_sq32 = [promote(e_) for e_ in exp_avg_sq]
+    denom = exp_avg_sq_(exp_avg_sq32, g32, old_debiased2, eps)
+    torch._foreach_div_(g32, denom)
+    if decay != 0:
+        torch._foreach_add_(g32, y, alpha=decay)
+    for p, z_, g in zip(y, z, g32):
+        _compilable_schedule_free_(p, z_, ckp1, g, lr, beta1)
 class PaLMForeachSFAdamW(ScheduleFree):
     def __init__(self, params, lr=0.0025, beta=0.9, betas=(None, None), eps=1e-8, weight_decay=0, warmup_steps=0, r=0.0,
-                 weight_lr_power=2.0, beta2_scale: float = 0.8,
-                 foreach: bool = True):
+                 weight_lr_power=2.0, beta2_scale: float = 0.8, foreach: bool = True):
         if betas[0] is not None:
             beta = betas[0]
         defaults = dict(lr=lr, beta=beta, eps=eps, r=r, k=0, warmup_steps=warmup_steps, train_mode=True, weight_sum=0.0,
@@ -33,25 +47,18 @@ class PaLMForeachSFAdamW(ScheduleFree):
                 self.state_(p)['z'] = torch.clone(p.data)
                 self.state_(p)['exp_avg_sq'] = torch.zeros_like(p.data, dtype=torch.float32)
-        y, grad, exp_avg_sq, z = zip(
-            *[(p.data, p.grad.float(), self.state_(p)['exp_avg_sq'], self.state_(p)['z']) for p in active_p])
         # Decay the first moment running average coefficient
         beta2 = 1 - (k + 1) ** -group['beta2_scale']
-        old_debiased = beta_debias(beta2, k + 1)
-        # Decay the first and second moment running average coefficient
-        denom = exp_avg_sq_(exp_avg_sq, grad, old_debiased, eps)
-        # Normalize grad in-place for memory efficiency
-        torch._foreach_div_(grad, denom)
-        # Weight decay calculated at y
-        if decay != 0:
-            torch._foreach_add_(grad, y, alpha=decay)
+        y, grad, exp_avg_sq, z = zip(*[(p.data, p.grad, self.state_(p)['exp_avg_sq'], self.state_(p)['z'])  #
+                                       for p in active_p])
         lr = warmup(group['lr'], k + 1, group['warmup_steps'])
-        group['weight_sum'] = schedule_free_(lr, group['weight_lr_power'], group['weight_sum'], group['beta'], y, z,
-                                             grad, group['r'], k + 1)
+        ckp1, group['weight_sum'] = get_ckp1(lr, group['weight_lr_power'], group['weight_sum'], group['r'], k + 1)
+        step = torch.empty((), dtype=torch.int32, device=y[0].device).fill_(k + 1)
+        ckp1 = torch.empty((), dtype=torch.float32, device=y[0].device).fill_(ckp1)
+        beta2 = torch.empty((), dtype=torch.float32, device=y[0].device).fill_(beta2)
+        lr = torch.empty((), dtype=torch.float32, device=y[0].device).fill_(lr)
+        _compilable_step_(y, grad, exp_avg_sq, z, group['beta'], beta2, step, ckp1, eps, decay, lr)
         group['k'] = k + 1

heavyball/psgd_kron.py CHANGED Viewed

@@ -104,7 +104,8 @@ class ForeachPSGDKron(PSGDBase):
             if should_update:
                 q32 = [promote(q_) for q_ in q]
-                self.do_update(group, [p], [g], [q32], precond_lr, [q_orig], store_triu_as_line)
+                self.do_update(group, [p], [ea if momentum_into_precond_update else g], [q32], precond_lr, [q_orig],
+                               store_triu_as_line)
             set_(g, psgd_precond_grad(q, self.state_(p)["exprs"], ea))
         grad_list = self.clip_fn(grad_list)

heavyball/utils.py CHANGED Viewed

@@ -40,14 +40,25 @@ def warmup(lr: float, step: int, warmup_steps: int):
 @torch.compile(mode='max-autotune-no-cudagraphs', fullgraph=True, dynamic=True)
 def _compilable_schedule_free_(p, z, ckp1, grad, lr, beta1):
-    p32 = p.float()
-    z32 = z.float()
-    p32.lerp_(end=z32, weight=1 - ckp1)
+    p32 = promote(p)
+    z32 = promote(z)
+    p32.lerp_(end=z32, weight=ckp1)
     p32.add_(grad, alpha=lr * (beta1 * (1 - ckp1) - 1))
-    _guarded_copy_stochastic(p, p32)
+    copy_stochastic_(p, p32)
     z32.add_(grad, alpha=-lr)
-    _guarded_copy_stochastic(z, z32)
+    copy_stochastic_(z, z32)
+def get_ckp1(lr, weight_lr_power, weight_sum, r, step):
+    weight = lr ** weight_lr_power * max(step, 1) ** r
+    weight_sum = weight_sum + weight
+    try:
+        ckp1 = weight / weight_sum
+    except ZeroDivisionError:
+        ckp1 = 0
+    return ckp1, weight_sum
 def schedule_free_(lr: float, weight_lr_power: float, weight_sum: float, beta1: float, parameters: List[torch.Tensor],
@@ -136,7 +147,7 @@ def exp_avg_sq_(state, grad, beta2, eps, out=None):
         return torch.sqrt(state, out=out).clamp_(min=eps)
     torch._foreach_mul_(state, beta2)
-    torch._foreach_addcmul_(state, grad, grad, value=1 - beta2)
+    [s.addcmul_(g, g, value=1 - beta2) for s, g in zip(state, grad)]
     denom = torch._foreach_sqrt(state)
     torch._foreach_maximum_(denom, eps)
     return denom
@@ -332,9 +343,9 @@ def compute_ggt(grad, GG, max_precond_dim, precondition_1d, beta):
 def promote(x):
-    if x in (torch.bfloat16, torch.float16):
+    if isinstance(x, torch.dtype) and x in (torch.bfloat16, torch.float16):
         return torch.float32
-    if hasattr(x, 'dtype') and x.dtype in (torch.bfloat16, torch.float16):
+    if isinstance(x, torch.Tensor) and x.dtype in (torch.bfloat16, torch.float16):
         return x.float()
     return x
@@ -486,13 +497,8 @@ def copy_stochastic_list_(target: List[torch.Tensor], source: List[torch.Tensor]
         copy_stochastic_(t, s)
-def _guarded_copy_stochastic(target: torch.Tensor, source: torch.Tensor):
-    if target.dtype != torch.bfloat16 or source.dtype not in (torch.float16, torch.float32, torch.float64):
-        set_(target, source)
-    _compilable_copy_stochastic_(target, source)
-@torch.compile(mode='max-autotune-no-cudagraphs', fullgraph=True, dynamic=True)
+# this can be dynamic for most optimizers - just not for PSGD. So, it's disabled for all
+@torch.compile(mode='max-autotune-no-cudagraphs', fullgraph=True)
 def _compilable_copy_stochastic_(target: torch.Tensor, source: torch.Tensor):
     """Taken as-is from https://github.com/pytorch/pytorch/issues/120376#issuecomment-1974828905"""
     # create a random 16 bit integer
@@ -509,22 +515,24 @@ def _compilable_copy_stochastic_(target: torch.Tensor, source: torch.Tensor):
 def copy_stochastic_(target: torch.Tensor, source: torch.Tensor):
-    if target.data_ptr() == source.data_ptr():
+    if not torch.compiler.is_compiling() and target.data_ptr() == source.data_ptr():
         return
-    _guarded_copy_stochastic(target, source)
+    if target.dtype != torch.bfloat16 or source.dtype not in (torch.float16, torch.float32, torch.float64):
+        set_(target, source)
+    _compilable_copy_stochastic_(target, source)
 @torch.compile(mode='max-autotune-no-cudagraphs', fullgraph=True, dynamic=True)
 def _compilable_update_one_(p, u, decay, add_fn, lr):
-    p32 = p.float()
-    u32 = u.view(p.shape).float()
+    p32 = promote(p)
+    u32 = promote(u.view(p.shape))
     if decay > 0:
         p32.mul_(1 - decay * lr)
     if add_fn is None:
         p32.add_(u32, alpha=lr)
     else:
         add_fn(p32, u32, lr)
-    _guarded_copy_stochastic(p, p32)
+    copy_stochastic_(p, p32)
 def update_param_(param: List[torch.Tensor], update: List[torch.Tensor], lr: float, decay: float,

{heavyball-0.18.7.dist-info → heavyball-0.18.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: heavyball
-Version: 0.18.7
+Version: 0.18.8
 Summary: Efficient optimizers
 Home-page: https://github.com/clashluke/heavyball
 Author: Lucas Nestler

{heavyball-0.18.7.dist-info → heavyball-0.18.8.dist-info}/RECORD RENAMED Viewed

@@ -2,23 +2,23 @@ heavyball/__init__.py,sha256=iqP428JWwwx-XDOZ0nUdbCkOLEyfoqVyWZLQLAcwxaw,2214
 heavyball/cached_delayed_psgd_kron.py,sha256=PQAER6UgVh5l87DGRZrJ8CVP9UhyCG5wJD9rPLnj_G8,6460
 heavyball/cached_psgd_kron.py,sha256=GaeneBp0irksCSBIrJY4D_0hCpZ-uSRPMhqVX_a-og8,6417
 heavyball/delayed_psgd.py,sha256=fhBWFLTSl1S2gHWCeYak-STaXRwpC56sWZGLFMKFEJM,5589
-heavyball/foreach_adamw.py,sha256=CTg7rfUmlTSjihD5KY9xP0sT2dUKZyZ4-2V42Vlr28U,1780
-heavyball/foreach_adopt.py,sha256=ogOw2JjwEQNj7AKlweAphQFdMJ_GcMDm-RyDvEzugoc,1911
-heavyball/foreach_laprop.py,sha256=yGVmGqWiSw8Y2Xj70ndkR8ZMygakTB4_iRwV02Svkqg,1816
-heavyball/foreach_sfadamw.py,sha256=15-n6-lx4PAHYsKYmXbugxsR5MnqaPYy2vUudPRiitg,2087
+heavyball/foreach_adamw.py,sha256=kluVzZquZII0NdakfubPOBJRubsavKpphlu6yrx3zks,2320
+heavyball/foreach_adopt.py,sha256=lOFrw4kCLmDacN3AVPnlb8ZLFENhDCsC-Vvig_LJEK0,3086
+heavyball/foreach_laprop.py,sha256=3lDrjPpHsUzSjR6rPA_FVNpZg2mh2zHleEnLZt11h5A,2224
+heavyball/foreach_sfadamw.py,sha256=pk7oI1nFQ8zo9M3Icn2c_GJ7EhqJt5plqa6aopBwljg,2553
 heavyball/foreach_soap.py,sha256=h6ptMch7oaynvu3eIJtWnVXypDA_5JDVm3Zb3PNEma0,4634
 heavyball/p_adam.py,sha256=4zJDGJrpgUyVzr3GiELETFre4xr3-PE10OuAZj-jFM8,5883
-heavyball/palm_foreach_sfadamw.py,sha256=yvZbPyjDW8qd3r4qDXb6uTr5RozQ7JSDj4aYYRnKGLA,2248
+heavyball/palm_foreach_sfadamw.py,sha256=QzSudBWBA8nfO-T3bhXpi7uqxyA5mULqUwbOnELY8-M,2834
 heavyball/palm_foreach_soap.py,sha256=g4hbiGRcti-J-a0SwAkP4ii5pU-aalsZH5bssyhroLk,5938
 heavyball/precond_schedule_foreach_soap.py,sha256=WLg5SzpJnKPZUvFyIvdwSZa1Umt5cpr3Kow_42orM-E,4863
 heavyball/precond_schedule_palm_foreach_soap.py,sha256=ammQrvRZFF-wc-wEiPEoFhS_7b8pdV61QfcLoQfimSo,6211
 heavyball/precond_schedule_sfpsoap.py,sha256=vq7jd302refKPa_9X2lkOTOtCCcTBVByPdojklrY8pA,6770
-heavyball/psgd_kron.py,sha256=u46dorOUXx-do1IYeno2wj-6l1zYKMQQC-N2Zr2PzLI,5476
+heavyball/psgd_kron.py,sha256=wKjtI56iUnL5D8DseW60kxiXTAlMYNEf52CrvQaQMnI,5547
 heavyball/pure_psgd.py,sha256=iUy7mMKWxwNiVUMYrQ7SBnreu3t_XSbnhTW3a1yw4m0,4835
 heavyball/schedule_free_palm_foreach_soap.py,sha256=zkcikH5wWbzq4kOrmBjilvY3iWzuUddcv2HNEPKr3MI,6366
-heavyball/utils.py,sha256=lKIV11qvlHITK7lwaScGbP1ryCmInse9Fe64t0OBmQQ,31408
-heavyball-0.18.7.dist-info/LICENSE,sha256=CGdGJim64YifGmUVPaeyRsxkvyExtClswhRNIp8FY_U,1322
-heavyball-0.18.7.dist-info/METADATA,sha256=KUYpVlwytyMmQBuby0Jf1WaklYdc2GPddiMAqyGKzsM,11810
-heavyball-0.18.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-heavyball-0.18.7.dist-info/top_level.txt,sha256=SzCxSVg_qCUPA4kZObW3Zyo4v-d_mMOD-p7a-WXTl2E,10
-heavyball-0.18.7.dist-info/RECORD,,
+heavyball/utils.py,sha256=BWscCHlGOw1_zfKYxNAAmfFeOXVpSJHuvqqlfL5A7_0,31690
+heavyball-0.18.8.dist-info/LICENSE,sha256=CGdGJim64YifGmUVPaeyRsxkvyExtClswhRNIp8FY_U,1322
+heavyball-0.18.8.dist-info/METADATA,sha256=lwSm2CcImS8GyuowrzKAzxEAU6EU94ixwMW7UF5JZbQ,11810
+heavyball-0.18.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+heavyball-0.18.8.dist-info/top_level.txt,sha256=SzCxSVg_qCUPA4kZObW3Zyo4v-d_mMOD-p7a-WXTl2E,10
+heavyball-0.18.8.dist-info/RECORD,,

{heavyball-0.18.7.dist-info → heavyball-0.18.8.dist-info}/LICENSE RENAMED Viewed

File without changes

{heavyball-0.18.7.dist-info → heavyball-0.18.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{heavyball-0.18.7.dist-info → heavyball-0.18.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

heavyball 0.18.7__py3-none-any.whl → 0.18.8__py3-none-any.whl

heavyball 0.18.7py3-none-any.whl → 0.18.8py3-none-any.whl