PyPI - heavyball - Versions diffs - 2.1.1__py3-none-any.whl → 2.1.3__py3-none-any.whl - Mend

heavyball 2.1.1py3-none-any.whl → 2.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

heavyball/helpers.py CHANGED Viewed

@@ -3,7 +3,8 @@ from __future__ import annotations
 import functools
 import math
 import threading
-from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+from contextlib import contextmanager
+from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Sequence, Tuple, Union
 import numpy
 import numpy as np
@@ -11,7 +12,6 @@ import optuna
 import optunahub
 import pandas as pd
 import torch
-from botorch.utils.sampling import manual_seed
 from hebo.design_space.design_space import DesignSpace
 from hebo.optimizers.hebo import HEBO
 from optuna._transform import _SearchSpaceTransform
@@ -21,13 +21,6 @@ from optuna.samplers._lazy_random_state import LazyRandomState
 from optuna.study import Study
 from optuna.study._study_direction import StudyDirection
 from optuna.trial import FrozenTrial, TrialState
-from optuna_integration.botorch import (
-    ehvi_candidates_func,
-    logei_candidates_func,
-    qehvi_candidates_func,
-    qei_candidates_func,
-    qparego_candidates_func,
-)
 from torch import Tensor
 from torch.nn import functional as F
@@ -37,6 +30,33 @@ _MAXINT32 = (1 << 31) - 1
 _SAMPLER_KEY = "auto:sampler"
+@contextmanager
+def manual_seed(seed: int | None = None) -> Generator[None, None, None]:
+    r"""
+    Contextmanager for manual setting the torch.random seed.
+    Args:
+        seed: The seed to set the random number generator to.
+    Returns:
+        Generator
+    Example:
+        >>> with manual_seed(1234):
+        >>>     X = torch.rand(3)
+    copied as-is from https://github.com/meta-pytorch/botorch/blob/a42cd65f9b704cdb6f2ee64db99a022eb15295d5/botorch/utils/sampling.py#L53C1-L75C50 under the MIT License
+    """
+    old_state = torch.random.get_rng_state()
+    try:
+        if seed is not None:
+            torch.random.manual_seed(seed)
+        yield
+    finally:
+        if seed is not None:
+            torch.random.set_rng_state(old_state)
 class SimpleAPIBaseSampler(BaseSampler):
     def __init__(
         self,
@@ -65,6 +85,16 @@ def _get_default_candidates_func(
     """
     The original is available at https://github.com/optuna/optuna-integration/blob/156a8bc081322791015d2beefff9373ed7b24047/optuna_integration/botorch/botorch.py under the MIT License
     """
+    # lazy import
+    from optuna_integration.botorch import (
+        ehvi_candidates_func,
+        logei_candidates_func,
+        qehvi_candidates_func,
+        qei_candidates_func,
+        qparego_candidates_func,
+    )
     if n_objectives > 3 and not has_constraint and not consider_running_trials:
         return ehvi_candidates_func
     elif n_objectives > 3:

heavyball/utils.py CHANGED Viewed

@@ -27,6 +27,7 @@ class ZerothPowerMode(enum.Enum):
     qr = "qr"
     svd = "svd"
     legacy_svd = "legacy_svd"
+    thinky_polar_express = "thinky_polar_express"
 class OrthoScaleMode(enum.Enum):
@@ -46,7 +47,7 @@ _cudnn_double_backward_pattern = re.compile(
 )
 _torch_compile_double_backward_pattern = re.compile(r"compile.*does not currently support double backward")
 _fd_error = (
-    "You can accelerate startup by globally enabling finite_differences first "  #
+    "You can accelerate startup by globally enabling finite_differences first "
     "(via opt.finite_differences=True or by subclassing it)\n"
     "Original Error: "
 )
@@ -390,12 +391,12 @@ def zeropower_via_newtonschulz5(G, steps=5, eps=1e-7):
     )  # batched Muon implementation by @scottjmaddox, and put into practice in the record by @YouJiacheng
     assert steps == 5
     G = G.clone()
-    X = G if G.dtype == torch.float64 else stochastic_round_(G)
+    x = G if G.dtype == torch.float64 else stochastic_round_(G)
     if G.size(-2) > G.size(-1):
-        X = X.mT
+        x = x.mT
     # X = X / (X.norm(dim=(-2, -1), keepdim=True) + eps)
-    stochastic_divide_with_eps_(X, G.norm(dim=(-2, -1)), eps)  # ensure top singular value <= 1
+    stochastic_divide_with_eps_(x, G.norm(dim=(-2, -1)), eps)  # ensure top singular value <= 1
     # Perform the NS iterations
     for a, b, c in [
         (4.0848, -6.8946, 2.9270),
@@ -404,13 +405,75 @@ def zeropower_via_newtonschulz5(G, steps=5, eps=1e-7):
         (2.8769, -3.1427, 1.2046),
         (2.8366, -3.0525, 1.2012),
     ]:
-        A = X @ X.mT
-        B = b * A + c * A @ A  # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
-        X = a * X + B @ X
+        s = x @ x.mT
+        y = c * s
+        y.diagonal(dim1=-2, dim2=-1).add_(b)
+        y = y @ s
+        y.diagonal(dim1=-2, dim2=-1).add_(a)
+        x = y @ x
     if G.size(-2) > G.size(-1):
-        X = X.mT
-    return X.to(G.dtype)
+        x = x.mT
+    return x.to(G.dtype)
+###### START
+# Based on https://arxiv.org/pdf/2505.16932v3
+# and https://github.com/NoahAmsel/PolarExpress/blob/5454910920ca8c65afda28820cdf9e49b9436ed0/polar_express.py#L69-L82
+# and https://github.com/thinking-machines-lab/manifolds/blob/89dcae50f01af59f1e0570289474da3a2ecaa60b/src/msign.py#L47
+#
+# under the MIT License
+# Coefficients are from https://arxiv.org/pdf/2505.16932v3
+ABC_LIST: list[tuple[float, float, float]] = [
+    (8.28721201814563, -23.595886519098837, 17.300387312530933),
+    (4.107059111542203, -2.9478499167379106, 0.5448431082926601),
+    (3.9486908534822946, -2.908902115962949, 0.5518191394370137),
+    (3.3184196573706015, -2.488488024314874, 0.51004894012372),
+    (2.300652019954817, -1.6689039845747493, 0.4188073119525673),
+    (1.891301407787398, -1.2679958271945868, 0.37680408948524835),
+    (1.8750014808534479, -1.2500016453999487, 0.3750001645474248),
+    (1.875, -1.25, 0.375),
+]
+# safety factor for numerical stability (but exclude last polynomial)
+ABC_LIST_STABLE: list[tuple[float, float, float]] = [
+    (a / 1.01, b / 1.01**3, c / 1.01**5) for (a, b, c) in ABC_LIST[:-1]
+] + [ABC_LIST[-1]]
+def msign(G: torch.Tensor, steps: int = 10, eps: float = 1e-7) -> torch.Tensor:
+    """
+    Polar Express algorithm for the matrix sign function:
+    https://arxiv.org/abs/2505.16932
+    """
+    assert G.ndim >= 2
+    should_transpose: bool = G.size(-2) > G.size(-1)
+    x = G if G.dtype == torch.float64 else stochastic_round_(G)
+    if should_transpose:
+        x = x.mT
+    # x = x / (x.norm(dim=(-2, -1), keepdim=True) * 1.01 + eps)
+    stochastic_divide_with_eps_(x, x.norm(dim=(-2, -1)) * 1.01, eps)
+    for step in range(steps):
+        a, b, c = ABC_LIST_STABLE[step] if step < len(ABC_LIST_STABLE) else ABC_LIST_STABLE[-1]
+        s = x @ x.mT
+        # goal is to compute x = a x + b S x + c S^2 x
+        # we can break this up into: x = (a I + (b I + c S) S) x
+        y = c * s
+        y.diagonal(dim1=-2, dim2=-1).add_(b)
+        y = y @ s
+        y.diagonal(dim1=-2, dim2=-1).add_(a)
+        x = y @ x
+    if should_transpose:
+        x = x.mT
+    return x.float()
+###### END
 @decorator_knowngood
@@ -418,19 +481,22 @@ def legacy_zeropower_via_newtonschulz5(G, steps=5, eps=1e-7):
     assert len(G.shape) == 2
     a, b, c = (3.4445, -4.7750, 2.0315)
     G = G.clone()
-    X = G if G.dtype == torch.float64 else stochastic_round_(G)
+    x = G if G.dtype == torch.float64 else stochastic_round_(G)
     # X = X / (X.norm(dim=(-2, -1), keepdim=True) + eps)
-    stochastic_divide_with_eps_(X, G.norm(dim=(-2, -1)), eps)  # ensure top singular value <= 1
+    stochastic_divide_with_eps_(x, G.norm(dim=(-2, -1)), eps)  # ensure top singular value <= 1
     if G.size(0) > G.size(1):
-        X = X.T
+        x = x.T
     for _ in range(steps):
-        A = X @ X.T
-        B = b * A + c * A @ A  # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
-        X = a * X + B @ X
+        s = x @ x.mT
+        y = c * s
+        y.diagonal(dim1=-2, dim2=-1).add_(b)
+        y = y @ s
+        y.diagonal(dim1=-2, dim2=-1).add_(a)
+        x = y @ x
     if G.size(0) > G.size(1):
-        X = X.T
-    return X.to(G.dtype)
+        x = x.T
+    return x.to(G.dtype)
 @decorator_knowngood
@@ -492,6 +558,8 @@ def _compilable_orthogonal_(x: Tensor, mode: str | ZerothPowerMode, out: Tensor
         scale_mode = OrthoScaleMode(scale_mode)
     if mode == ZerothPowerMode.newtonschulz or x.shape[0] != x.shape[1]:
         y = zeropower_via_newtonschulz5(x, 5)
+    elif mode == ZerothPowerMode.thinky_polar_express:
+        y = msign(x, 10)
     elif mode == ZerothPowerMode.legacy_newtonschulz:
         y = legacy_zeropower_via_newtonschulz5(x, 5)
     elif mode == ZerothPowerMode.qr:
@@ -1522,7 +1590,7 @@ def _compilable_copy_stochastic_(target: Tensor, source: Tensor):
 def copy_stochastic_(target: Tensor, source: Tensor):
     if target.dtype == torch.bfloat16 and source.dtype in (torch.float16, torch.float32, torch.float64):
-        _compilable_copy_stochastic_(target, source.float())
+        source = stochastic_round_(target, source)
     set_(target, source)
@@ -2349,10 +2417,11 @@ def bf16_matmul(x: Tensor, y: Tensor):
 def if_iscompiling(fn):
     base = getattr(torch, fn.__name__, None)
-    def _fn(x):
-        if torch.compiler.is_compiling() and hasattr(torch, fn.__name__):
-            return base(x)
-        return fn(x)
+    @functools.wraps(fn)
+    def _fn(*args, **kwargs):
+        if torch.compiler.is_compiling() and base is not None:
+            return base(*args, **kwargs)
+        return fn(*args, **kwargs)
     return _fn

{heavyball-2.1.1.dist-info → heavyball-2.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: heavyball
-Version: 2.1.1
+Version: 2.1.3
 Summary: Efficient Optimizers
 Author-email: HeavyBall Authors <github.heavyball@nestler.sh>
 Project-URL: source, https://github.com/HomebrewML/HeavyBall
@@ -16,11 +16,12 @@ Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: opt-einsum>=3.4.0
-Requires-Dist: torch>=2.7.0
-Requires-Dist: numpy
+Requires-Dist: torch<3.0,>=2.2
+Requires-Dist: numpy<2.0.0
 Provides-Extra: dev
 Requires-Dist: pre-commit; extra == "dev"
 Requires-Dist: pytest; extra == "dev"
+Requires-Dist: hypothesis; extra == "dev"
 Requires-Dist: ruff; extra == "dev"
 Requires-Dist: matplotlib; extra == "dev"
 Requires-Dist: seaborn; extra == "dev"

heavyball-2.1.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+heavyball/__init__.py,sha256=1BTb7G-VcfcMyS4EpuVnhE5DBp2fj_Zzs9EQr6slPzg,30491
+heavyball/chainable.py,sha256=8S-7QRZYiy_ARhQ8uDu5G0Eg3ouT9Vcfk-rxbKlp4zI,42510
+heavyball/helpers.py,sha256=is4Egdgoj2GUsBYdraItonqsoVIY9ZKP_VZl-hEnF1Y,31077
+heavyball/utils.py,sha256=_AOFIkFyaMO39YjbvclkzivR-nKe_kLShRZda3rgMiA,104850
+heavyball-2.1.3.dist-info/licenses/LICENSE,sha256=G9fFZcNIVWjU7o6Pr_4sJBRCNDU5X-zelSxIJ2D48ms,1323
+heavyball-2.1.3.dist-info/METADATA,sha256=by35259YI9DvUQ8Vq958sHmAxqSVtPY5JoY5Hn0CccY,5088
+heavyball-2.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+heavyball-2.1.3.dist-info/top_level.txt,sha256=SzCxSVg_qCUPA4kZObW3Zyo4v-d_mMOD-p7a-WXTl2E,10
+heavyball-2.1.3.dist-info/RECORD,,

heavyball-2.1.1.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-heavyball/__init__.py,sha256=1BTb7G-VcfcMyS4EpuVnhE5DBp2fj_Zzs9EQr6slPzg,30491
-heavyball/chainable.py,sha256=8S-7QRZYiy_ARhQ8uDu5G0Eg3ouT9Vcfk-rxbKlp4zI,42510
-heavyball/helpers.py,sha256=zk_S84wpGcvO9P6kn4UeaQUIDowHxcbM9qQITEm2g5I,30267
-heavyball/utils.py,sha256=zAOlSDqMbSUJEdCfoOcUbRIO94Qg4cxT40IN_UPskQk,102492
-heavyball-2.1.1.dist-info/licenses/LICENSE,sha256=G9fFZcNIVWjU7o6Pr_4sJBRCNDU5X-zelSxIJ2D48ms,1323
-heavyball-2.1.1.dist-info/METADATA,sha256=92i_Q4bxQgRsH8BEOYEuW0Qg43nR5jJLSPGIIJmyzxc,5037
-heavyball-2.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-heavyball-2.1.1.dist-info/top_level.txt,sha256=SzCxSVg_qCUPA4kZObW3Zyo4v-d_mMOD-p7a-WXTl2E,10
-heavyball-2.1.1.dist-info/RECORD,,

{heavyball-2.1.1.dist-info → heavyball-2.1.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{heavyball-2.1.1.dist-info → heavyball-2.1.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{heavyball-2.1.1.dist-info → heavyball-2.1.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

heavyball 2.1.1__py3-none-any.whl → 2.1.3__py3-none-any.whl

heavyball 2.1.1py3-none-any.whl → 2.1.3py3-none-any.whl