adv-optm 2.4.dev1__tar.gz → 2.4.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/PKG-INFO +1 -1
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/__init__.py +1 -1
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/util/scaled_optm.py +4 -2
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm.egg-info/PKG-INFO +1 -1
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/setup.py +1 -1
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/LICENSE +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/README.md +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/optim/AdaMuon_adv.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/optim/AdamW_adv.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/optim/Adopt_adv.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/optim/Lion_Prodigy_adv.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/optim/Lion_adv.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/optim/Muon_adv.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/optim/Prodigy_adv.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/optim/SignSGD_adv.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/optim/Simplified_AdEMAMix.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/optim/__init__.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/util/Kourkoutas.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/util/Muon_AuxAdam.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/util/Muon_util.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/util/OrthoGrad.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/util/__init__.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/util/centered_decay.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/util/factorization_util.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/util/lion_k.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/util/param_update.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm/util/update_util.py +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm.egg-info/SOURCES.txt +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm.egg-info/dependency_links.txt +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm.egg-info/requires.txt +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/adv_optm.egg-info/top_level.txt +0 -0
- {adv_optm-2.4.dev1 → adv_optm-2.4.dev2}/setup.cfg +0 -0
|
@@ -51,7 +51,7 @@ def scale_wds(wd: float, cwd: float, p: torch.Tensor) -> tuple[float, float]:
|
|
|
51
51
|
if getattr(p, '_is_dora_scale', False):
|
|
52
52
|
return wd, cwd
|
|
53
53
|
|
|
54
|
-
conflict =
|
|
54
|
+
conflict = cwd != 0
|
|
55
55
|
|
|
56
56
|
if getattr(p, '_is_oft', False):
|
|
57
57
|
# Fallback to standard WD (using cwd value) if both are active.
|
|
@@ -85,7 +85,8 @@ def rms_normalization(update: torch.Tensor, dim: int | None, lr: float) -> torch
|
|
|
85
85
|
"""Performs Root Mean Square normalization on the update tensor."""
|
|
86
86
|
n = update.numel() if dim is None else update.shape[dim]
|
|
87
87
|
norm = torch.linalg.vector_norm(update, ord=2, dim=dim, keepdim=True).clamp_min_(1e-12)
|
|
88
|
-
|
|
88
|
+
scale_n = n**0.5
|
|
89
|
+
return update.mul_(lr * scale_n / norm)
|
|
89
90
|
|
|
90
91
|
|
|
91
92
|
def is_spectral(p: torch.Tensor) -> bool:
|
|
@@ -111,6 +112,7 @@ def spectral_normalization(update: torch.Tensor, vector_state: torch.Tensor, lr:
|
|
|
111
112
|
"""
|
|
112
113
|
d_out = update.shape[0]
|
|
113
114
|
d_in = update.numel() // d_out
|
|
115
|
+
update = update.to(vector_state.dtype)
|
|
114
116
|
update_flat = update.view(d_out, d_in)
|
|
115
117
|
# Target scale derived from the "Modular Norm" paper
|
|
116
118
|
target_scale = (d_out / d_in) ** 0.5
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|