PyPI - heavyball - Versions diffs - 1.4.3__py3-none-any.whl → 1.4.4__py3-none-any.whl - Mend

heavyball 1.4.3py3-none-any.whl → 1.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

heavyball/utils.py CHANGED Viewed

@@ -482,7 +482,7 @@ def scalar_guard(*args):
     out = []
     for x in xs:
         if isinstance(x, float):
-            out.append(torch.empty((), dtype=torch.float32, device=ref.device).fill_(x))
+            out.append(torch.empty((), dtype=promote(ref.dtype), device=ref.device).fill_(x))
         elif isinstance(x, int):
             out.append(torch.empty((), dtype=torch.int64, device=ref.device).fill_(x))
         else:
@@ -1043,7 +1043,8 @@ def psgd_calc_A_and_conjB(exprA, G, Q):
         if q.dim() <= 1:
             conjB /= q
         else:
-            conjB = torch.linalg.solve_triangular(q, conjB.reshape(-1, q.size(0)), upper=True, left=False).reshape_as(conjB)
+            conjB = torch.linalg.solve_triangular(q, conjB.reshape(-1, q.size(0)), upper=True, left=False).reshape_as(
+                conjB)
         if i < order - 1:
             conjB = torch.transpose(conjB, i, order - 1)
     return A, conjB
@@ -1286,7 +1287,6 @@ def _compilable_fused_precond_grad_cached_(expr: str, ea: Tensor, param, lr, gra
 def fused_precond_grad_cached_(expr: str, ea: Tensor, param, lr, grad, decay, caution, *cached_q: Tensor):
     lr = scalar_guard(lr, param[0])
     _compilable_fused_precond_grad_cached_(expr, ea, param, lr, grad, decay, caution, *cached_q)
@@ -1338,25 +1338,20 @@ def caution(g, update):
     return _compilable_cautioning(g, update)
-def precond_update_prob_schedule(max_prob=1.0, min_prob=0.03, decay=0.001, flat_start=500):
+def precond_update_prob_schedule(max_prob=1.0, min_prob=0.03, decay=0.999, flat_start=1000):
     """Anneal preconditioner update probability during beginning of training.
     PSGD benefits from more preconditioner updates at the beginning of training,
     but once the preconditioner is learned the update probability can drop low.
     This schedule is an exponential anneal with a flat start. Default settings keep
-    update probability at 1.0 for 200 steps then exponentially anneal down to
-    `min_prob` by 4000 steps. Default settings work very well for most models and
+    update probability at `max_prob` for 1000 steps then exponentially anneal down to
+    `min_prob` by ~4000 steps. Default settings work very well for most models and
     training regimes.
     """
     def _schedule(n):
-        if n < flat_start:  # higher numerical stability
-            return max_prob
-        n -= flat_start
-        prob = max_prob * math.exp(-decay * (n - flat_start))
-        return max(min_prob, min(max_prob, prob))
+        return max(min_prob, max_prob * decay ** max(n - flat_start, 0))
     return _schedule
@@ -1375,12 +1370,18 @@ def merge_group(group, *tensors):
 def hook_optimizer_into_model(model, optimizer, *args, **kwargs):
-    def _step(p: Tensor, o: torch.optim.Optimizer):
+    optimizers = {}
+    def _step(p: Tensor):
+        o = optimizers[p]
         o.step()
         o.zero_grad()
     for p in model.parameters():
-        p.register_post_accumulate_grad_hook(functools.partial(_step, o=optimizer([p], *args, **kwargs)))
+        optimizers[p] = optimizer([p], *args, **kwargs)
+        p.register_post_accumulate_grad_hook(_step)
+    return optimizers
 def fused_hook(parameters, optimizer, *args, **kwargs):
@@ -1401,6 +1402,8 @@ def fused_hook(parameters, optimizer, *args, **kwargs):
     for p in parameters:
         p.register_post_accumulate_grad_hook(_step)
+    return o
 @decorator_knowngood
 def _compilable_caution_no_scale(g: Tensor, update: Tensor):

{heavyball-1.4.3.dist-info → heavyball-1.4.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: heavyball
-Version: 1.4.3
+Version: 1.4.4
 Summary: Efficient optimizers
 Home-page: https://github.com/clashluke/heavyball
 Author: Lucas Nestler

heavyball-1.4.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+heavyball/__init__.py,sha256=miRgcXlzLWTNzojeRF5hEcg-x_GqfMHjRzOaiR_zO3U,10981
+heavyball/chainable.py,sha256=-5ovRa7yD7V41_cgaBJtO5fBrnBemAILl4YKjQmeuns,24183
+heavyball/utils.py,sha256=lFwN8T-dlldmOe-Qd6iWhSqqNfWl7IBawLWAo5l9rPw,48071
+heavyball-1.4.4.dist-info/LICENSE,sha256=CGdGJim64YifGmUVPaeyRsxkvyExtClswhRNIp8FY_U,1322
+heavyball-1.4.4.dist-info/METADATA,sha256=w5nAamE6sr08elqo2fS6B_kXktOMXxFQvyJTkRT4Eqo,43584
+heavyball-1.4.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+heavyball-1.4.4.dist-info/top_level.txt,sha256=SzCxSVg_qCUPA4kZObW3Zyo4v-d_mMOD-p7a-WXTl2E,10
+heavyball-1.4.4.dist-info/RECORD,,

heavyball-1.4.3.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-heavyball/__init__.py,sha256=miRgcXlzLWTNzojeRF5hEcg-x_GqfMHjRzOaiR_zO3U,10981
-heavyball/chainable.py,sha256=-5ovRa7yD7V41_cgaBJtO5fBrnBemAILl4YKjQmeuns,24183
-heavyball/utils.py,sha256=x0rSU8lko7ACdI9GuTLC0wP6HwIZxwB8f8tukBOR0xA,48129
-heavyball-1.4.3.dist-info/LICENSE,sha256=CGdGJim64YifGmUVPaeyRsxkvyExtClswhRNIp8FY_U,1322
-heavyball-1.4.3.dist-info/METADATA,sha256=RM_pOme3dsQL-drKcKD6FJ0qE3SSh4JdPM-kC9vpbeU,43584
-heavyball-1.4.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-heavyball-1.4.3.dist-info/top_level.txt,sha256=SzCxSVg_qCUPA4kZObW3Zyo4v-d_mMOD-p7a-WXTl2E,10
-heavyball-1.4.3.dist-info/RECORD,,

{heavyball-1.4.3.dist-info → heavyball-1.4.4.dist-info}/LICENSE RENAMED Viewed

File without changes

{heavyball-1.4.3.dist-info → heavyball-1.4.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{heavyball-1.4.3.dist-info → heavyball-1.4.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

heavyball 1.4.3__py3-none-any.whl → 1.4.4__py3-none-any.whl

heavyball 1.4.3py3-none-any.whl → 1.4.4py3-none-any.whl