PyPI - adv-optm - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.4__py3-none-any.whl - Mend

adv-optm 1.2.0py3-none-any.whl → 1.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

adv_optm/__init__.py +1 -1
adv_optm/optim/AdaMuon_adv.py +14 -5
adv_optm/optim/AdamW_adv.py +9 -2
adv_optm/optim/Adopt_adv.py +9 -2
adv_optm/optim/Lion_Prodigy_adv.py +8 -1
adv_optm/optim/Lion_adv.py +8 -1
adv_optm/optim/Muon_adv.py +16 -9
adv_optm/optim/Prodigy_adv.py +9 -2
adv_optm/optim/Simplified_AdEMAMix.py +9 -2
adv_optm/util/BF16_Stochastic_Rounding.py +29 -4
adv_optm/util/Kourkoutas.py +12 -6
{adv_optm-1.2.0.dist-info → adv_optm-1.2.4.dist-info}/METADATA +1 -1
adv_optm-1.2.4.dist-info/RECORD +23 -0
adv_optm-1.2.0.dist-info/RECORD +0 -23
{adv_optm-1.2.0.dist-info → adv_optm-1.2.4.dist-info}/WHEEL +0 -0
{adv_optm-1.2.0.dist-info → adv_optm-1.2.4.dist-info}/licenses/LICENSE +0 -0
{adv_optm-1.2.0.dist-info → adv_optm-1.2.4.dist-info}/top_level.txt +0 -0

adv_optm/__init__.py CHANGED Viewed

@@ -20,4 +20,4 @@ __all__ = [
     "AdaMuon_adv",
 ]
-__version__ = "1.2.0"
+__version__ = "1.2.4"

adv_optm/optim/AdaMuon_adv.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
-from ..util.BF16_Stochastic_Rounding import add_stochastic_
+from ..util.BF16_Stochastic_Rounding import add_stochastic_, set_seed as set_stochastic_rounding_seed
 from ..util.Newton_Schulz import _newton_schulz_iteration
 from ..util.Effective_Shape import _get_effective_shape
 from ..util.NNMF import _nnmf,_unnmf
@@ -184,6 +184,13 @@ class AdaMuon_adv(torch.optim.Optimizer):
             torch._dynamo.config.cache_size_limit = 8192
             self.compile(fullgraph=True)
+        if self.stochastic_rounding:
+            # For deterministic stochastic rounding, we need to seed the generator
+            # for each device used by the parameters.
+            devices = {p.device for group in self.param_groups for p in group['params'] if p.dtype == torch.bfloat16}
+            for device in devices:
+                set_stochastic_rounding_seed(device)
     @property
     def supports_fused_back_pass(self):
         return True
@@ -241,6 +248,8 @@ class AdaMuon_adv(torch.optim.Optimizer):
                 elif len(p.shape) >= 2:
                     state['normuon_v'] = torch.zeros(p.shape[0], device=p.device, dtype=torch.float32)
+            group['adam_kourkoutas_beta'] = False
         elif optim_type == 'adam':
             state['step'] = 0
@@ -441,8 +450,6 @@ class AdaMuon_adv(torch.optim.Optimizer):
                     )
                 del signed_m_buf
-                update = update.view(original_shape)
                 if group['normuon_variant']:
                     # NorMuon Logic
                     v_t = state['normuon_v']
@@ -452,7 +459,9 @@ class AdaMuon_adv(torch.optim.Optimizer):
                     # Normalize update
                     update.div_(v_t.sqrt().unsqueeze(1).add_(group['eps']))
                     del mean_squared_update
+                    update = update.view(original_shape)
                 else:
+                    update = update.view(original_shape)
                     # Original AdaMuon Logic
                     vt_buf = state['second_momentum_buffer']
                     vt_buf.mul_(beta2).addcmul_(update, update, value=1 - beta2)
@@ -470,10 +479,10 @@ class AdaMuon_adv(torch.optim.Optimizer):
                 if group['rms_rescaling']:
                     rms_target = 0.2 # default (Adam) value for RMS
                     update_norm = torch.linalg.vector_norm(update)
-                    update = update.view(p.shape).mul_(rms_target * lr * (p.numel()**0.5) / update_norm.add_(1e-8))
+                    update.mul_(rms_target * lr * (p.numel()**0.5) / update_norm.add_(1e-8))
                     del update_norm
                 else:
-                    update = update.view(p.shape).mul_(lr)
+                    update.mul_(lr)
             else: # Fallback to standard SGD with momentum for 1D params (biases, etc.)
                 # Momentum update

adv_optm/optim/AdamW_adv.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import torch
 from typing import Optional, Callable
-from ..util.BF16_Stochastic_Rounding import add_stochastic_
+from ..util.BF16_Stochastic_Rounding import add_stochastic_, set_seed as set_stochastic_rounding_seed
 from ..util.Effective_Shape import _get_effective_shape
 from ..util.NNMF import _nnmf,_unnmf
 from ..util.OrthoGrad import _orthogonalize_gradient
@@ -142,6 +142,13 @@ class AdamW_adv(torch.optim.Optimizer):
         if self.kourkoutas_beta:
             self.kourkoutas_helper = KourkoutasHelper(self)
+        if self.stochastic_rounding:
+            # For deterministic stochastic rounding, we need to seed the generator
+            # for each device used by the parameters.
+            devices = {p.device for group in self.param_groups for p in group['params'] if p.dtype == torch.bfloat16}
+            for device in devices:
+                set_stochastic_rounding_seed(device)
     @property
     def supports_fused_back_pass(self):
         return True
@@ -215,7 +222,7 @@ class AdamW_adv(torch.optim.Optimizer):
             # Accumulate current grad's norm for the *next* step
             self.kourkoutas_helper.accumulate_gradient_sq_norm(p, grad)
             # Get the dynamic beta2 calculated in prepare_step()
-            beta2 = self.kourkoutas_helper.get_beta2(p, group, current_step)
+            beta2 = self.kourkoutas_helper.get_beta2(p, group)
         step = state['step'] + 1
         if group['use_bias_correction']:

adv_optm/optim/Adopt_adv.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import torch
 from typing import Callable, Optional
-from ..util.BF16_Stochastic_Rounding import add_stochastic_
+from ..util.BF16_Stochastic_Rounding import add_stochastic_, set_seed as set_stochastic_rounding_seed
 from ..util.Effective_Shape import _get_effective_shape
 from ..util.NNMF import _nnmf, _unnmf
 from ..util.OrthoGrad import _orthogonalize_gradient
@@ -172,6 +172,13 @@ class Adopt_adv(torch.optim.Optimizer):
         if self.kourkoutas_beta:
             self.kourkoutas_helper = KourkoutasHelper(self)
+        if self.stochastic_rounding:
+            # For deterministic stochastic rounding, we need to seed the generator
+            # for each device used by the parameters.
+            devices = {p.device for group in self.param_groups for p in group['params'] if p.dtype == torch.bfloat16}
+            for device in devices:
+                set_stochastic_rounding_seed(device)
     @property
     def supports_fused_back_pass(self): return True
     @property
@@ -243,7 +250,7 @@ class Adopt_adv(torch.optim.Optimizer):
             # Accumulate current grad's norm for the *next* step
             self.kourkoutas_helper.accumulate_gradient_sq_norm(p, grad)
             # Get the dynamic beta2 calculated in prepare_step()
-            beta2 = self.kourkoutas_helper.get_beta2(p, group, current_step)
+            beta2 = self.kourkoutas_helper.get_beta2(p, group)
         # The first step is for initialization only (skip when use_atan2 as it's scale invariant).
         if state['step'] == 0 and not self.use_atan2:

adv_optm/optim/Lion_Prodigy_adv.py CHANGED Viewed

@@ -5,7 +5,7 @@ import math
 from typing import Tuple, Optional
-from ..util.BF16_Stochastic_Rounding import add_stochastic_
+from ..util.BF16_Stochastic_Rounding import add_stochastic_, set_seed as set_stochastic_rounding_seed
 from ..util.Effective_Shape import _get_effective_shape
 from ..util.NNMF import _nnmf,_unnmf
 from ..util.OrthoGrad import _orthogonalize_gradient
@@ -109,6 +109,13 @@ class Lion_Prodigy_adv(torch.optim.Optimizer):
         # Global state for accumulating metrics across parameter updates within a single step.
         self.init_step()
+        if self.stochastic_rounding:
+            # For deterministic stochastic rounding, we need to seed the generator
+            # for each device used by the parameters.
+            devices = {p.device for group in self.param_groups for p in group['params'] if p.dtype == torch.bfloat16}
+            for device in devices:
+                set_stochastic_rounding_seed(device)
     @property
     def supports_fused_back_pass(self) -> bool:
         return True

adv_optm/optim/Lion_adv.py CHANGED Viewed

@@ -2,7 +2,7 @@ import torch
 from typing import Tuple, Optional
-from ..util.BF16_Stochastic_Rounding import add_stochastic_
+from ..util.BF16_Stochastic_Rounding import add_stochastic_, set_seed as set_stochastic_rounding_seed
 from ..util.Effective_Shape import _get_effective_shape
 from ..util.NNMF import _nnmf,_unnmf
 from ..util.OrthoGrad import _orthogonalize_gradient
@@ -68,6 +68,13 @@ class Lion_adv(torch.optim.Optimizer):
         self.factored = nnmf_factor
         super().__init__(params, defaults)
+        if self.stochastic_rounding:
+            # For deterministic stochastic rounding, we need to seed the generator
+            # for each device used by the parameters.
+            devices = {p.device for group in self.param_groups for p in group['params'] if p.dtype == torch.bfloat16}
+            for device in devices:
+                set_stochastic_rounding_seed(device)
     @property
     def supports_fused_back_pass(self) -> bool:
         return True

adv_optm/optim/Muon_adv.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
-from ..util.BF16_Stochastic_Rounding import add_stochastic_
+from ..util.BF16_Stochastic_Rounding import add_stochastic_, set_seed as set_stochastic_rounding_seed
 from ..util.Newton_Schulz import _newton_schulz_iteration
 from ..util.Effective_Shape import _get_effective_shape
 from ..util.NNMF import _nnmf,_unnmf
@@ -186,6 +186,13 @@ class Muon_adv(torch.optim.Optimizer):
             torch._dynamo.config.cache_size_limit = 8192
             self.compile(fullgraph=True)
+        if self.stochastic_rounding:
+            # For deterministic stochastic rounding, we need to seed the generator
+            # for each device used by the parameters.
+            devices = {p.device for group in self.param_groups for p in group['params'] if p.dtype == torch.bfloat16}
+            for device in devices:
+                set_stochastic_rounding_seed(device)
     @property
     def supports_fused_back_pass(self):
         return True
@@ -363,14 +370,14 @@ class Muon_adv(torch.optim.Optimizer):
                 update.div_(v_t.sqrt().unsqueeze(1).add_(group['normuon_eps']))
                 del mean_squared_update
-                # RMS-aligned rescaling
-                if group['rms_rescaling']:
-                    rms_target = 0.2 # default (Adam) value for RMS
-                    update_norm = torch.linalg.vector_norm(update)
-                    update = update.view(p.shape).mul_(rms_target * lr * (p.numel()**0.5) / update_norm.add_(1e-8))
-                    del update_norm
-                else:
-                    update = update.view(p.shape).mul_(lr)
+            # RMS-aligned rescaling
+            if group['rms_rescaling']:
+                rms_target = 0.2 # default (Adam) value for RMS
+                update_norm = torch.linalg.vector_norm(update)
+                update = update.view(p.shape).mul_(rms_target * lr * (p.numel()**0.5) / update_norm.add_(1e-8))
+                del update_norm
+            else:
+                update = update.view(p.shape).mul_(lr)
             state['sign_buf'] = _pack_bools(mt_buf > 0)
             _nnmf(mt_buf.abs(), out=(state['mu_mbuf_nmf'], state['mv_mbuf_nmf']))

adv_optm/optim/Prodigy_adv.py CHANGED Viewed

@@ -5,7 +5,7 @@ import math
 from typing import Optional, Callable
-from ..util.BF16_Stochastic_Rounding import add_stochastic_
+from ..util.BF16_Stochastic_Rounding import add_stochastic_, set_seed as set_stochastic_rounding_seed
 from ..util.Effective_Shape import _get_effective_shape
 from ..util.NNMF import _nnmf,_unnmf
 from ..util.OrthoGrad import _orthogonalize_gradient
@@ -212,6 +212,13 @@ class Prodigy_adv(torch.optim.Optimizer):
             self.kourkoutas_helper = KourkoutasHelper(self)
         self.init_step()
+        if self.stochastic_rounding:
+            # For deterministic stochastic rounding, we need to seed the generator
+            # for each device used by the parameters.
+            devices = {p.device for group in self.param_groups for p in group['params'] if p.dtype == torch.bfloat16}
+            for device in devices:
+                set_stochastic_rounding_seed(device)
     @property
     def supports_fused_back_pass(self):
         return True
@@ -310,7 +317,7 @@ class Prodigy_adv(torch.optim.Optimizer):
             # Accumulate current grad's norm for the *next* step
             self.kourkoutas_helper.accumulate_gradient_sq_norm(p, grad)
             # Get the dynamic beta2 calculated in prepare_step()
-            beta2 = self.kourkoutas_helper.get_beta2(p, group, current_step)
+            beta2 = self.kourkoutas_helper.get_beta2(p, group)
         else:
             beta2 = self.beta2_default

adv_optm/optim/Simplified_AdEMAMix.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Optional, Callable
 import math
-from ..util.BF16_Stochastic_Rounding import add_stochastic_
+from ..util.BF16_Stochastic_Rounding import add_stochastic_, set_seed as set_stochastic_rounding_seed
 from ..util.Effective_Shape import _get_effective_shape
 from ..util.NNMF import _nnmf,_unnmf
 from ..util.OrthoGrad import _orthogonalize_gradient
@@ -127,6 +127,13 @@ class Simplified_AdEMAMix(torch.optim.Optimizer):
         if self.kourkoutas_beta:
             self.kourkoutas_helper = KourkoutasHelper(self)
+        if self.stochastic_rounding:
+            # For deterministic stochastic rounding, we need to seed the generator
+            # for each device used by the parameters.
+            devices = {p.device for group in self.param_groups for p in group['params'] if p.dtype == torch.bfloat16}
+            for device in devices:
+                set_stochastic_rounding_seed(device)
     @property
     def supports_fused_back_pass(self):
         return True
@@ -197,7 +204,7 @@ class Simplified_AdEMAMix(torch.optim.Optimizer):
             # Accumulate current grad's norm for the *next* step
             self.kourkoutas_helper.accumulate_gradient_sq_norm(p, grad)
             # Get the dynamic beta2 calculated in prepare_step()
-            beta2 = self.kourkoutas_helper.get_beta2(p, group, current_step)
+            beta2 = self.kourkoutas_helper.get_beta2(p, group)
         beta1_warmup = group["beta1_warmup"]
         alpha_grad = group["alpha_grad"]

adv_optm/util/BF16_Stochastic_Rounding.py CHANGED Viewed

@@ -1,10 +1,25 @@
 import torch
 from torch import Tensor
+from typing import Dict, Any
+_generators: Dict[torch.device, torch.Generator] = {}
+def set_seed(device: torch.device):
+    """
+    Initializes or resets the deterministic generator for a specific device.
+    This ensures that the sequence of random numbers used for stochastic
+    rounding is reproducible.
+    """
+    global _generators
+    if device not in _generators:
+        _generators[device] = torch.Generator(device=device)
+    _generators[device].manual_seed(42)
 def copy_stochastic_(target: Tensor, source: Tensor):
     """
     Nerogar's implementation of stochastic rounding in the paper "Revisiting BFloat16 Training"
-    (https://arxiv.org/abs/2010.06192).
+    (https://arxiv.org/abs/2010.06192). Made deterministic.
     see:
     https://github.com/pytorch/pytorch/issues/120376
     https://github.com/Nerogar/OneTrainer/blob/daae18eaed8c0fa39289b2ff79cc2c1e08577fcb/modules/util/bf16_stochastic_rounding.py
@@ -13,12 +28,21 @@ def copy_stochastic_(target: Tensor, source: Tensor):
         target: the target tensor with dtype=bfloat16
         source: the target tensor with dtype=float32
     """
+    global _generators
+    device = source.device
+    if device not in _generators:
+        set_seed(device)
+    generator = _generators[device]
     # create a random 16 bit integer
-    result = torch.randint_like(
-        source,
+    result = torch.randint(
+        size=source.shape,
+        device=source.device,
         dtype=torch.int32,
         low=0,
         high=(1 << 16),
+        generator=generator,
     )
     # add the random number to the lower 16 bit of the mantissa
@@ -32,6 +56,7 @@ def copy_stochastic_(target: Tensor, source: Tensor):
     del result
 def add_stochastic_(input: Tensor, other: Tensor, alpha: float = 1.0):
     """
     adds other to input using stochastic rounding
@@ -44,4 +69,4 @@ def add_stochastic_(input: Tensor, other: Tensor, alpha: float = 1.0):
     result = other.clone() if other.dtype == torch.float32 else other.to(dtype=torch.float32)
     result.add_(input, alpha=alpha)
-    copy_stochastic_(input, result)
+    copy_stochastic_(input, result)

adv_optm/util/Kourkoutas.py CHANGED Viewed

@@ -88,11 +88,17 @@ class KourkoutasHelper:
             # Use group-specific K-b settings, falling back to the optimizer's master defaults.
             # This makes the helper robust against param groups that enable kourkoutas_beta
             # but are missing the other required hyperparameters.
-            ema_alpha = group.get('ema_alpha', master_defaults['ema_alpha'])
-            beta2_max = group.get('betas', master_defaults['betas'])[1]
-            beta2_min = group.get('beta2_min', master_defaults['beta2_min'])
-            tiny_spike = group.get('tiny_spike', master_defaults['tiny_spike'])
-            k_warmup_steps = group.get('k_warmup_steps', master_defaults['k_warmup_steps'])
+            # In hybrid optimizers like Muon_adv, the Kourkoutas-related keys in the
+            # defaults and param_groups are prefixed with 'adam_' to avoid conflicts.
+            # We must detect this case and use the correct key names.
+            prefix = 'adam_' if group.get('adam_kourkoutas_beta', False) else ''
+            ema_alpha = group.get(f'{prefix}ema_alpha', master_defaults[f'{prefix}ema_alpha'])
+            betas_tuple = group.get(f'{prefix}betas', master_defaults[f'{prefix}betas'])
+            beta2_max = betas_tuple[1]
+            beta2_min = group.get(f'{prefix}beta2_min', master_defaults[f'{prefix}beta2_min'])
+            tiny_spike = group.get(f'{prefix}tiny_spike', master_defaults[f'{prefix}tiny_spike'])
+            k_warmup_steps = group.get(f'{prefix}k_warmup_steps', master_defaults[f'{prefix}k_warmup_steps'])
             r_ema_tensor = param_state['kourkoutas_r_ema']
             accumulator = self.layer_state[layer_key]['sum_sq_accumulator']
@@ -149,7 +155,7 @@ class KourkoutasHelper:
             # Accumulate for the *next* step's prepare_step call
             self.layer_state[layer_key]['sum_sq_accumulator'] += torch.sum(grad.detach().pow(2)).float()
-    def get_beta2(self, p: torch.Tensor, group: dict, current_step: int) -> float:
+    def get_beta2(self, p: torch.Tensor, group: dict) -> float:
         """
         Gets the appropriate beta2 for the current parameter, handling warmup and dynamic value fetching.
         """

{adv_optm-1.2.0.dist-info → adv_optm-1.2.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: adv_optm
-Version: 1.2.0
+Version: 1.2.4
 Summary: A family of highly efficient, lightweight yet powerful optimizers.
 Home-page: https://github.com/Koratahiu/Advanced_Optimizers
 Author: Koratahiu

adv_optm-1.2.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,23 @@
+adv_optm/__init__.py,sha256=fxQlsNgh5Su63jHu4hPZt_1NCsoFuEsZmAa7cvUn3I0,376
+adv_optm/optim/AdaMuon_adv.py,sha256=miib3NlnBZyT0K4wLliR7I9Vw4xsVdC3ewWfgP88mxE,34686
+adv_optm/optim/AdamW_adv.py,sha256=ZvZkqOIqT_013sCqRoL4drEKCCXbsQY-JRrRngoN9f8,18068
+adv_optm/optim/Adopt_adv.py,sha256=8es_ot1EgJa3SZHfKQ_PU4fYM4TJMAGOmOGq_876IOs,21870
+adv_optm/optim/Lion_Prodigy_adv.py,sha256=nns9Oz_0EKmGRN8p6kWMlRWKI-tHx8v8eg8TH-hXjJY,15047
+adv_optm/optim/Lion_adv.py,sha256=ug4uuQk3PmdkggsuzqNpZ6vieLUAbTHGr1Q_pvuLLVs,8729
+adv_optm/optim/Muon_adv.py,sha256=HEbyFYak4aDRfxJwwKD7PlvHXTE2TOpadWO0vRVnNf8,34119
+adv_optm/optim/Prodigy_adv.py,sha256=Wiukv1Hn6KFSslI6Dk4QXFFwNNtRjQsJ4GNEYkC4dFc,26662
+adv_optm/optim/Simplified_AdEMAMix.py,sha256=1j3M9t0Dza2dLVabwC0ft36sANx-QHBeLRp2WJlU_3s,13387
+adv_optm/optim/__init__.py,sha256=hpUWE6CKtt_rvMdgQVb3PtjhfZAvAxTq6hp8H8rIpBo,489
+adv_optm/util/BF16_Stochastic_Rounding.py,sha256=VXfv7U4-Yhyq1o6gZMApvW1DUUwZ15-eob98daQW9uc,2288
+adv_optm/util/Effective_Shape.py,sha256=TBvIk1V8IuTbbBsxuekJA4e_v8JlR5Nujtut8RTWAm4,318
+adv_optm/util/Kourkoutas.py,sha256=eSE2KUnvbxP2Kg4sUCFmqGLvX5eov4OUjULKBKHBLoc,8131
+adv_optm/util/NNMF.py,sha256=yRf5IP5Sjq0Uf0DxN0Q8NxEGSdD-f1ULziLVDOjY8K4,639
+adv_optm/util/Newton_Schulz.py,sha256=bBboYw_jm5_FMf0Citl79uqNedkHOTjQnUI7rZgLBmY,3341
+adv_optm/util/One_Bit_Boolean.py,sha256=Wat49esdwohuN-OHOFMW8D0aOQgV9cP5Rl8z6yfmpos,1068
+adv_optm/util/OrthoGrad.py,sha256=NzInuBQGy_Ja__M1R9XbvqVaQ0fhGbtGgFE9YON7B3I,707
+adv_optm/util/__init__.py,sha256=CXzS703GB4gil85khZi7sgKOnbzXGBOltshIOSPqj18,435
+adv_optm-1.2.4.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+adv_optm-1.2.4.dist-info/METADATA,sha256=FUjhBc16Ab58N06TRXq7087T9EwZnZqtLbw5niYzIS4,11917
+adv_optm-1.2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+adv_optm-1.2.4.dist-info/top_level.txt,sha256=iNfBIIzu-lPrQ7jyC56WBCcbkRwitM2nJ15-MRQ_6fg,9
+adv_optm-1.2.4.dist-info/RECORD,,

adv_optm-1.2.0.dist-info/RECORD DELETED Viewed

@@ -1,23 +0,0 @@
-adv_optm/__init__.py,sha256=lQwVoYMSCofAxJ_CadX2NssB1jldn9JetoyAVMJPDrs,376
-adv_optm/optim/AdaMuon_adv.py,sha256=sqMd1cdBDMpwVmEoU1w3vE_Fj3nfx-_bZjf8mf5st4Y,34189
-adv_optm/optim/AdamW_adv.py,sha256=pDKwdOV90qxTkRuIez0kU_VdI0ztJygY-MxhhQT10Yw,17652
-adv_optm/optim/Adopt_adv.py,sha256=eSLJS0RVJ0MAE5pMFK-Q00vJF6NuxKJbefAg8F58XD4,21454
-adv_optm/optim/Lion_Prodigy_adv.py,sha256=LEA3UYJpPeFnmxeniLNv1u2LKKj4ufx3Bq_MLw-nWXk,14617
-adv_optm/optim/Lion_adv.py,sha256=aGNAplZlyXYgVllYcV_s4bK8iC4fv6EizFoWIMNLdBc,8299
-adv_optm/optim/Muon_adv.py,sha256=_odkBjwIuY895fh6wAs_9ljXyyPCg9V-tAQnjMVf4Po,33721
-adv_optm/optim/Prodigy_adv.py,sha256=H0xuVhaCDJF6ilts_It20teZZCN4MSbOSPQ-fsy6pEg,26246
-adv_optm/optim/Simplified_AdEMAMix.py,sha256=nEIA3yM11nBooKzHudB5l3x4UdFRBYRwiKVUkGmO0K8,12971
-adv_optm/optim/__init__.py,sha256=hpUWE6CKtt_rvMdgQVb3PtjhfZAvAxTq6hp8H8rIpBo,489
-adv_optm/util/BF16_Stochastic_Rounding.py,sha256=Q5H0BcogmE4atP65dLoI21HKSf50lRdsBDfeF6v9Tbg,1548
-adv_optm/util/Effective_Shape.py,sha256=TBvIk1V8IuTbbBsxuekJA4e_v8JlR5Nujtut8RTWAm4,318
-adv_optm/util/Kourkoutas.py,sha256=C_Qn6I0Qao_9D_nCv4ZYmC_SgJLoPwhrMb5FkRQ-k1M,7693
-adv_optm/util/NNMF.py,sha256=yRf5IP5Sjq0Uf0DxN0Q8NxEGSdD-f1ULziLVDOjY8K4,639
-adv_optm/util/Newton_Schulz.py,sha256=bBboYw_jm5_FMf0Citl79uqNedkHOTjQnUI7rZgLBmY,3341
-adv_optm/util/One_Bit_Boolean.py,sha256=Wat49esdwohuN-OHOFMW8D0aOQgV9cP5Rl8z6yfmpos,1068
-adv_optm/util/OrthoGrad.py,sha256=NzInuBQGy_Ja__M1R9XbvqVaQ0fhGbtGgFE9YON7B3I,707
-adv_optm/util/__init__.py,sha256=CXzS703GB4gil85khZi7sgKOnbzXGBOltshIOSPqj18,435
-adv_optm-1.2.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
-adv_optm-1.2.0.dist-info/METADATA,sha256=qsz3TfHskcMBhNRYT_YD58_pr2kfFdsm8LQ5WOXoBlE,11917
-adv_optm-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-adv_optm-1.2.0.dist-info/top_level.txt,sha256=iNfBIIzu-lPrQ7jyC56WBCcbkRwitM2nJ15-MRQ_6fg,9
-adv_optm-1.2.0.dist-info/RECORD,,

{adv_optm-1.2.0.dist-info → adv_optm-1.2.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{adv_optm-1.2.0.dist-info → adv_optm-1.2.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{adv_optm-1.2.0.dist-info → adv_optm-1.2.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

adv-optm 1.2.0__py3-none-any.whl → 1.2.4__py3-none-any.whl

adv-optm 1.2.0py3-none-any.whl → 1.2.4py3-none-any.whl