heavyball 2.1.0__tar.gz → 2.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {heavyball-2.1.0 → heavyball-2.1.1}/PKG-INFO +1 -1
- {heavyball-2.1.0 → heavyball-2.1.1}/heavyball/utils.py +19 -2
- {heavyball-2.1.0 → heavyball-2.1.1}/heavyball.egg-info/PKG-INFO +1 -1
- {heavyball-2.1.0 → heavyball-2.1.1}/pyproject.toml +1 -1
- {heavyball-2.1.0 → heavyball-2.1.1}/LICENSE +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/README.md +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/heavyball/__init__.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/heavyball/chainable.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/heavyball/helpers.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/heavyball.egg-info/SOURCES.txt +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/heavyball.egg-info/dependency_links.txt +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/heavyball.egg-info/requires.txt +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/heavyball.egg-info/top_level.txt +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/setup.cfg +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_bf16_params.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_bf16_q.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_bf16_storage.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_caution.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_channels_last.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_clip.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_closure.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_ema.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_foreach.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_hook.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_mars.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_memory.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_memory_leak.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_merge.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_migrate_cli.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_nd_param.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_no_grad.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_psgd_precond_init_stability.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_save_restore.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_singular_values.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_soap.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_stochastic_updates.py +0 -0
- {heavyball-2.1.0 → heavyball-2.1.1}/test/test_toy_training.py +0 -0
@@ -394,7 +394,8 @@ def zeropower_via_newtonschulz5(G, steps=5, eps=1e-7):
|
|
394
394
|
if G.size(-2) > G.size(-1):
|
395
395
|
X = X.mT
|
396
396
|
|
397
|
-
|
397
|
+
# X = X / (X.norm(dim=(-2, -1), keepdim=True) + eps)
|
398
|
+
stochastic_divide_with_eps_(X, G.norm(dim=(-2, -1)), eps) # ensure top singular value <= 1
|
398
399
|
# Perform the NS iterations
|
399
400
|
for a, b, c in [
|
400
401
|
(4.0848, -6.8946, 2.9270),
|
@@ -418,7 +419,9 @@ def legacy_zeropower_via_newtonschulz5(G, steps=5, eps=1e-7):
|
|
418
419
|
a, b, c = (3.4445, -4.7750, 2.0315)
|
419
420
|
G = G.clone()
|
420
421
|
X = G if G.dtype == torch.float64 else stochastic_round_(G)
|
421
|
-
|
422
|
+
|
423
|
+
# X = X / (X.norm(dim=(-2, -1), keepdim=True) + eps)
|
424
|
+
stochastic_divide_with_eps_(X, G.norm(dim=(-2, -1)), eps) # ensure top singular value <= 1
|
422
425
|
if G.size(0) > G.size(1):
|
423
426
|
X = X.T
|
424
427
|
for _ in range(steps):
|
@@ -755,6 +758,20 @@ def stochastic_multiply_(x: List[Tensor] | Tensor, y: List[Tensor] | Tensor):
|
|
755
758
|
_compilable_stochastic_multiply_(x, y)
|
756
759
|
|
757
760
|
|
761
|
+
@decorator_knowngood
|
762
|
+
def _compilable_stochastic_divide_with_eps_(x: List[Tensor], y: List[Tensor], eps: Tensor):
|
763
|
+
for x_, y_ in zip(x, y):
|
764
|
+
x32 = promote(x_)
|
765
|
+
y32 = promote(y_)
|
766
|
+
copy_stochastic_(x_, x32 / (y32 + eps))
|
767
|
+
|
768
|
+
|
769
|
+
def stochastic_divide_with_eps_(x: List[Tensor] | Tensor, y: List[Tensor] | Tensor, eps: float):
|
770
|
+
x, y = broadcastable_list_guard(x, y)
|
771
|
+
eps = scalar_guard(eps, y[0])
|
772
|
+
_compilable_stochastic_divide_with_eps_(x, y, eps)
|
773
|
+
|
774
|
+
|
758
775
|
@decorator
|
759
776
|
def update_ggt(grad, GG, max_precond_dim, precondition_1d, beta):
|
760
777
|
"""
|
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|
5
5
|
[project]
|
6
6
|
name = "heavyball"
|
7
7
|
description = "Efficient Optimizers"
|
8
|
-
version = "2.1.
|
8
|
+
version = "2.1.1"
|
9
9
|
authors = [{ name = "HeavyBall Authors", email = "github.heavyball@nestler.sh" }]
|
10
10
|
classifiers = ["Intended Audience :: Developers",
|
11
11
|
"Intended Audience :: Science/Research",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|