adv-optm 2.4.dev8__tar.gz → 2.4.dev10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/PKG-INFO +1 -1
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/__init__.py +1 -1
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/optim/AdaMuon_adv.py +1 -1
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/optim/AdamW_adv.py +1 -1
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/optim/Adopt_adv.py +1 -1
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/optim/Lion_adv.py +1 -2
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/optim/Muon_adv.py +1 -1
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/optim/SGD_adv.py +1 -1
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/optim/SignSGD_adv.py +1 -2
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/optim/Simplified_AdEMAMix.py +1 -1
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm.egg-info/PKG-INFO +1 -1
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/setup.py +1 -1
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/LICENSE +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/README.md +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/optim/Lion_Prodigy_adv.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/optim/Prodigy_adv.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/optim/__init__.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/Kourkoutas.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/Muon_AuxAdam.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/Muon_util.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/OrthoGrad.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/__init__.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/centered_decay.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/factorization_util.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/lion_k.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/param_update.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/scaled_optm.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/signed_util.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/sinkhorn.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/state_util.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm/util/update_util.py +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm.egg-info/SOURCES.txt +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm.egg-info/dependency_links.txt +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm.egg-info/requires.txt +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/adv_optm.egg-info/top_level.txt +0 -0
- {adv_optm-2.4.dev8 → adv_optm-2.4.dev10}/setup.cfg +0 -0
|
@@ -392,7 +392,7 @@ class AdaMuon_adv(torch.optim.Optimizer):
|
|
|
392
392
|
|
|
393
393
|
# Spectral Normalization
|
|
394
394
|
if group.get('spectral_normalization', False):
|
|
395
|
-
init_spectral_norm(
|
|
395
|
+
init_spectral_norm(state, p)
|
|
396
396
|
|
|
397
397
|
# MARS-M state initialization
|
|
398
398
|
if group.get('approx_mars', False):
|
|
@@ -297,7 +297,7 @@ class AdamW_adv(torch.optim.Optimizer):
|
|
|
297
297
|
init_state_tensor(state, 'exp_avg_sq', p.shape, actual_precision, p.device, dtype, non_neg=True)
|
|
298
298
|
|
|
299
299
|
if group.get('spectral_normalization', False) and is_spectral(p):
|
|
300
|
-
init_spectral_norm(
|
|
300
|
+
init_spectral_norm(state, p)
|
|
301
301
|
|
|
302
302
|
_init_anchor(p, state, group)
|
|
303
303
|
|
|
@@ -8,7 +8,6 @@ from ..util.factorization_util import _get_effective_shape, _reconstruct_state,
|
|
|
8
8
|
from ..util.lion_k import _get_lion_k_update
|
|
9
9
|
from ..util.scaled_optm import scale_update, is_spectral, init_spectral_norm
|
|
10
10
|
from ..util.centered_decay import _init_anchor
|
|
11
|
-
from ..util.update_util import _get_l1_adaptive_lr
|
|
12
11
|
from ..util.signed_util import apply_stochastic_sign
|
|
13
12
|
|
|
14
13
|
|
|
@@ -199,7 +198,7 @@ class Lion_adv(torch.optim.Optimizer):
|
|
|
199
198
|
state['prev_sign'] = (grad > 0).to(torch.uint8)
|
|
200
199
|
|
|
201
200
|
if group.get('spectral_normalization', False) and is_spectral(p):
|
|
202
|
-
init_spectral_norm(
|
|
201
|
+
init_spectral_norm(state, p)
|
|
203
202
|
|
|
204
203
|
_init_anchor(p, state, group)
|
|
205
204
|
|
|
@@ -338,7 +338,7 @@ class Muon_adv(torch.optim.Optimizer):
|
|
|
338
338
|
|
|
339
339
|
# Spectral Normalization
|
|
340
340
|
if group.get('spectral_normalization', False):
|
|
341
|
-
init_spectral_norm(
|
|
341
|
+
init_spectral_norm(state, p)
|
|
342
342
|
|
|
343
343
|
# MARS-M state initialization
|
|
344
344
|
if group.get('approx_mars', False):
|
|
@@ -176,7 +176,7 @@ class SGD_adv(torch.optim.Optimizer):
|
|
|
176
176
|
init_state_tensor(state, 'momentum_buffer', p.shape, actual_precision, p.device, dtype)
|
|
177
177
|
|
|
178
178
|
if group.get('spectral_normalization', False) and is_spectral(p):
|
|
179
|
-
init_spectral_norm(
|
|
179
|
+
init_spectral_norm(state, p)
|
|
180
180
|
|
|
181
181
|
_init_anchor(p, state, group)
|
|
182
182
|
|
|
@@ -6,7 +6,6 @@ from ..util import param_update
|
|
|
6
6
|
from ..util.OrthoGrad import _orthogonalize_gradient
|
|
7
7
|
from ..util.factorization_util import _get_effective_shape, _reconstruct_state, _factorize_state, _pack_bools, _unpack_bools
|
|
8
8
|
from ..util.lion_k import _get_lion_k_update
|
|
9
|
-
from ..util.update_util import _get_l1_adaptive_lr
|
|
10
9
|
from ..util.scaled_optm import scale_update, is_spectral, init_spectral_norm
|
|
11
10
|
from ..util.centered_decay import _init_anchor
|
|
12
11
|
from ..util.signed_util import apply_stochastic_sign
|
|
@@ -230,7 +229,7 @@ class SignSGD_adv(torch.optim.Optimizer):
|
|
|
230
229
|
state['sign'] = torch.zeros((d1, packed_d2), dtype=torch.uint8, device=p.device)
|
|
231
230
|
|
|
232
231
|
if group.get('spectral_normalization', False) and is_spectral(p):
|
|
233
|
-
init_spectral_norm(
|
|
232
|
+
init_spectral_norm(state, p)
|
|
234
233
|
|
|
235
234
|
if group.get("l1_adaptive", False):
|
|
236
235
|
state["step"] = 0
|
|
@@ -244,7 +244,7 @@ class Simplified_AdEMAMix(torch.optim.Optimizer):
|
|
|
244
244
|
state['den_sum'] = 1.0
|
|
245
245
|
|
|
246
246
|
if group.get('spectral_normalization', False) and is_spectral(p):
|
|
247
|
-
init_spectral_norm(
|
|
247
|
+
init_spectral_norm(state, p)
|
|
248
248
|
|
|
249
249
|
_init_anchor(p, state, group)
|
|
250
250
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|