adv-optm 2.4.dev9__tar.gz → 2.4.dev11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/PKG-INFO +1 -1
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/__init__.py +1 -1
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/optim/AdaMuon_adv.py +1 -1
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/optim/AdamW_adv.py +1 -1
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/optim/Adopt_adv.py +1 -1
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/optim/Lion_adv.py +1 -1
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/optim/Muon_adv.py +1 -1
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/optim/SGD_adv.py +2 -2
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/optim/SignSGD_adv.py +1 -1
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/optim/Simplified_AdEMAMix.py +1 -1
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm.egg-info/PKG-INFO +1 -1
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/setup.py +1 -1
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/LICENSE +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/README.md +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/optim/Lion_Prodigy_adv.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/optim/Prodigy_adv.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/optim/__init__.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/Kourkoutas.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/Muon_AuxAdam.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/Muon_util.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/OrthoGrad.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/__init__.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/centered_decay.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/factorization_util.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/lion_k.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/param_update.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/scaled_optm.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/signed_util.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/sinkhorn.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/state_util.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm/util/update_util.py +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm.egg-info/SOURCES.txt +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm.egg-info/dependency_links.txt +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm.egg-info/requires.txt +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/adv_optm.egg-info/top_level.txt +0 -0
- {adv_optm-2.4.dev9 → adv_optm-2.4.dev11}/setup.cfg +0 -0
|
@@ -392,7 +392,7 @@ class AdaMuon_adv(torch.optim.Optimizer):
|
|
|
392
392
|
|
|
393
393
|
# Spectral Normalization
|
|
394
394
|
if group.get('spectral_normalization', False):
|
|
395
|
-
init_spectral_norm(
|
|
395
|
+
init_spectral_norm(state, p)
|
|
396
396
|
|
|
397
397
|
# MARS-M state initialization
|
|
398
398
|
if group.get('approx_mars', False):
|
|
@@ -297,7 +297,7 @@ class AdamW_adv(torch.optim.Optimizer):
|
|
|
297
297
|
init_state_tensor(state, 'exp_avg_sq', p.shape, actual_precision, p.device, dtype, non_neg=True)
|
|
298
298
|
|
|
299
299
|
if group.get('spectral_normalization', False) and is_spectral(p):
|
|
300
|
-
init_spectral_norm(
|
|
300
|
+
init_spectral_norm(state, p)
|
|
301
301
|
|
|
302
302
|
_init_anchor(p, state, group)
|
|
303
303
|
|
|
@@ -198,7 +198,7 @@ class Lion_adv(torch.optim.Optimizer):
|
|
|
198
198
|
state['prev_sign'] = (grad > 0).to(torch.uint8)
|
|
199
199
|
|
|
200
200
|
if group.get('spectral_normalization', False) and is_spectral(p):
|
|
201
|
-
init_spectral_norm(
|
|
201
|
+
init_spectral_norm(state, p)
|
|
202
202
|
|
|
203
203
|
_init_anchor(p, state, group)
|
|
204
204
|
|
|
@@ -338,7 +338,7 @@ class Muon_adv(torch.optim.Optimizer):
|
|
|
338
338
|
|
|
339
339
|
# Spectral Normalization
|
|
340
340
|
if group.get('spectral_normalization', False):
|
|
341
|
-
init_spectral_norm(
|
|
341
|
+
init_spectral_norm(state, p)
|
|
342
342
|
|
|
343
343
|
# MARS-M state initialization
|
|
344
344
|
if group.get('approx_mars', False):
|
|
@@ -176,7 +176,7 @@ class SGD_adv(torch.optim.Optimizer):
|
|
|
176
176
|
init_state_tensor(state, 'momentum_buffer', p.shape, actual_precision, p.device, dtype)
|
|
177
177
|
|
|
178
178
|
if group.get('spectral_normalization', False) and is_spectral(p):
|
|
179
|
-
init_spectral_norm(
|
|
179
|
+
init_spectral_norm(state, p)
|
|
180
180
|
|
|
181
181
|
_init_anchor(p, state, group)
|
|
182
182
|
|
|
@@ -255,7 +255,7 @@ class SGD_adv(torch.optim.Optimizer):
|
|
|
255
255
|
del random_int_state_tensor
|
|
256
256
|
|
|
257
257
|
if group['sinkhorn']:
|
|
258
|
-
update = apply_sr_sinkhorn(update, iters=group['
|
|
258
|
+
update = apply_sr_sinkhorn(update, iters=group['sinkhorn_iterations'])
|
|
259
259
|
|
|
260
260
|
update_scaling = step_size
|
|
261
261
|
if group.get('spectral_normalization', False):
|
|
@@ -229,7 +229,7 @@ class SignSGD_adv(torch.optim.Optimizer):
|
|
|
229
229
|
state['sign'] = torch.zeros((d1, packed_d2), dtype=torch.uint8, device=p.device)
|
|
230
230
|
|
|
231
231
|
if group.get('spectral_normalization', False) and is_spectral(p):
|
|
232
|
-
init_spectral_norm(
|
|
232
|
+
init_spectral_norm(state, p)
|
|
233
233
|
|
|
234
234
|
if group.get("l1_adaptive", False):
|
|
235
235
|
state["step"] = 0
|
|
@@ -244,7 +244,7 @@ class Simplified_AdEMAMix(torch.optim.Optimizer):
|
|
|
244
244
|
state['den_sum'] = 1.0
|
|
245
245
|
|
|
246
246
|
if group.get('spectral_normalization', False) and is_spectral(p):
|
|
247
|
-
init_spectral_norm(
|
|
247
|
+
init_spectral_norm(state, p)
|
|
248
248
|
|
|
249
249
|
_init_anchor(p, state, group)
|
|
250
250
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|