adv-optm 1.1.0.dev2__py3-none-any.whl → 1.1.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of adv-optm might be problematic. Click here for more details.

adv_optm/__init__.py CHANGED
@@ -16,4 +16,4 @@ __all__ = [
16
16
  "Lion_Prodigy_adv",
17
17
  ]
18
18
 
19
- __version__ = "1.1.0.dev2"
19
+ __version__ = "1.1.0.dev3"
@@ -228,7 +228,7 @@ class Prodigy_adv(torch.optim.Optimizer):
228
228
  self.beta3 = g_group['beta3']
229
229
  if self.beta3 is None:
230
230
  self.beta3 = math.sqrt(self.beta2_default)
231
-
231
+
232
232
  self.d = g_group['d']
233
233
  lr = g_group['lr']
234
234
 
@@ -306,8 +306,10 @@ class Prodigy_adv(torch.optim.Optimizer):
306
306
  self.kourkoutas_helper.accumulate_gradient_sq_norm(p, grad)
307
307
  # Get the dynamic beta2 calculated in prepare_step()
308
308
  beta2 = self.kourkoutas_helper.get_beta2(p, group, current_step)
309
+ beta3 = math.sqrt(beta2)
309
310
  else:
310
311
  beta2 = self.beta2_default
312
+ beta3 = self.beta3
311
313
 
312
314
  if self.use_AdEMAMix:
313
315
  beta3_ema = group['beta3_ema']
@@ -444,7 +446,7 @@ class Prodigy_adv(torch.optim.Optimizer):
444
446
  self.d_numerator += (self.d / d0) * self.dlr * torch.dot(grad_flat[::slice_p], p0.data - p_flat[::slice_p]).item()
445
447
 
446
448
  alpha = ((self.d / d0) * self.d) if safeguard_warmup else ((self.d / d0) * self.dlr)
447
- s.mul_(self.beta3).add_(grad_flat[::slice_p], alpha=alpha)
449
+ s.mul_(beta3).add_(grad_flat[::slice_p], alpha=alpha)
448
450
  self.d_denom += s.abs().sum().item()
449
451
 
450
452
  del s, p0, grad_flat, p_flat, alpha
@@ -18,6 +18,10 @@ class KourkoutasHelper:
18
18
  self._layer_info_built = False
19
19
  self._current_step_prepared = -1
20
20
 
21
+ # This ensures the map is complete before the first backward pass,
22
+ # making it compatible with fused back pass mechanisms.
23
+ self._build_layer_info_if_needed()
24
+
21
25
  def _build_layer_info_if_needed(self):
22
26
  """Builds a map of layers and the parameters they contain."""
23
27
  if self._layer_info_built:
@@ -29,7 +33,7 @@ class KourkoutasHelper:
29
33
 
30
34
  for group in self.optimizer.param_groups:
31
35
  for p in group['params']:
32
- if p.grad is None: continue
36
+ # The mapping is static and should not depend on the presence of a gradient.
33
37
  layer_key = self.optimizer.layer_key_fn(p)
34
38
  if layer_key not in self.layer_info:
35
39
  self.layer_info[layer_key] = {'params': [], 'group_ref': group}
@@ -46,7 +50,6 @@ class KourkoutasHelper:
46
50
  Calculates dynamic beta2 for all layers using the completed scalar accumulators
47
51
  from the PREVIOUS step. Should be called once at the start of an optimizer step.
48
52
  """
49
- self._build_layer_info_if_needed()
50
53
 
51
54
  # Check if logging is enabled for this step based on the interval
52
55
  k_logging_interval = self.optimizer.param_groups[0].get('k_logging', 0)
@@ -113,7 +116,6 @@ class KourkoutasHelper:
113
116
  """
114
117
  Accumulates the squared L2 norm of a single gradient for the next step's calculation.
115
118
  """
116
- self._build_layer_info_if_needed()
117
119
  layer_key = self.optimizer.layer_key_fn(p)
118
120
 
119
121
  if layer_key in self.layer_info:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: adv_optm
3
- Version: 1.1.0.dev2
3
+ Version: 1.1.0.dev3
4
4
  Summary: A family of highly efficient, lightweight yet powerful optimizers.
5
5
  Home-page: https://github.com/Koratahiu/Advanced_Optimizers
6
6
  Author: Koratahiu
@@ -1,20 +1,20 @@
1
- adv_optm/__init__.py,sha256=hkmbLr1AVDoC6VbnyTkNy-G4g5bmcLFH2Kv4dYWp9uY,311
1
+ adv_optm/__init__.py,sha256=aSPtwpl2S7i_-KYXTDDeKoQlcLjZc6whVUNOINl6TEA,311
2
2
  adv_optm/optim/AdamW_adv.py,sha256=H4XlYZELwiFvXt0A9wMlRNiw9c8rmPMspHDCvR_SZIQ,17487
3
3
  adv_optm/optim/Adopt_adv.py,sha256=PJ3ZaLgzYbvxXDS56FGjzMrVMyHDXSWdUPHnX5NpNAA,21241
4
4
  adv_optm/optim/Lion_Prodigy_adv.py,sha256=sGzhts9a6gHfCkuHTB5L9IrClo4c6UThzYYErBwqOaA,12844
5
5
  adv_optm/optim/Lion_adv.py,sha256=6G1CukJB_pC7l9HwFEuY1ydsNHZFabVmOvcHDsHHVuQ,8295
6
- adv_optm/optim/Prodigy_adv.py,sha256=-eMTutexbGrUQtSehKaOo6BO_p3QySpSIMgJKWvbxog,25517
6
+ adv_optm/optim/Prodigy_adv.py,sha256=EeSfYu8IIeZX1Dk8MlD71vGOpMadtnW2iMhHxPDL2XQ,25574
7
7
  adv_optm/optim/Simplified_AdEMAMix.py,sha256=b4GaSI-TX6wFBqGxZeoJPbf2nVRCEtB3WVb1olDgY14,12980
8
8
  adv_optm/optim/__init__.py,sha256=pcP865H2j1tut2VfTUhzQh7V8TF_tzPjqFnjMfFed2k,382
9
9
  adv_optm/util/BF16_Stochastic_Rounding.py,sha256=Q5H0BcogmE4atP65dLoI21HKSf50lRdsBDfeF6v9Tbg,1548
10
10
  adv_optm/util/Effective_Shape.py,sha256=TBvIk1V8IuTbbBsxuekJA4e_v8JlR5Nujtut8RTWAm4,318
11
- adv_optm/util/Kourkoutas.py,sha256=6OzK96KJ7Dd9Py8hiGWszF9C_n4uVoDjFCA_EYbhL4c,6600
11
+ adv_optm/util/Kourkoutas.py,sha256=UN_EAbG-9p98Qp2c_vSUy1Gw1K55SQ_e0TmnNBb-OFQ,6748
12
12
  adv_optm/util/NNMF.py,sha256=yRf5IP5Sjq0Uf0DxN0Q8NxEGSdD-f1ULziLVDOjY8K4,639
13
13
  adv_optm/util/One_Bit_Boolean.py,sha256=Wat49esdwohuN-OHOFMW8D0aOQgV9cP5Rl8z6yfmpos,1068
14
14
  adv_optm/util/OrthoGrad.py,sha256=NzInuBQGy_Ja__M1R9XbvqVaQ0fhGbtGgFE9YON7B3I,707
15
15
  adv_optm/util/__init__.py,sha256=qoyIF0jcLjs_vSEcsv36clw5LFNBEbifyXrrVxMH-G4,349
16
- adv_optm-1.1.0.dev2.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
17
- adv_optm-1.1.0.dev2.dist-info/METADATA,sha256=Y2F2wkpPmdbRtHft1KdCm1D6feTmiP5kFJ6iYpSLwCo,8427
18
- adv_optm-1.1.0.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
- adv_optm-1.1.0.dev2.dist-info/top_level.txt,sha256=iNfBIIzu-lPrQ7jyC56WBCcbkRwitM2nJ15-MRQ_6fg,9
20
- adv_optm-1.1.0.dev2.dist-info/RECORD,,
16
+ adv_optm-1.1.0.dev3.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
17
+ adv_optm-1.1.0.dev3.dist-info/METADATA,sha256=03sDh1nQ1CQXxu4TbRnRblX1IZ9S-Eka7hP1LNs54WA,8427
18
+ adv_optm-1.1.0.dev3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
+ adv_optm-1.1.0.dev3.dist-info/top_level.txt,sha256=iNfBIIzu-lPrQ7jyC56WBCcbkRwitM2nJ15-MRQ_6fg,9
20
+ adv_optm-1.1.0.dev3.dist-info/RECORD,,