PyPI - adv-optm - Versions diffs - 1.1.0.dev2__py3-none-any.whl → 1.1.0.dev4__py3-none-any.whl - Mend

adv-optm 1.1.0.dev2py3-none-any.whl → 1.1.0.dev4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of adv-optm might be problematic. Click here for more details.

Files changed (10) hide show

adv_optm/util/Kourkoutas.py CHANGED Viewed

@@ -11,25 +11,32 @@ class KourkoutasHelper:
         if not hasattr(optimizer, 'param_groups'):
             raise TypeError("optimizer must be a valid torch.optim.Optimizer instance.")
         self.optimizer = optimizer
         # State managed by the helper
         self.layer_state = {}
         self.layer_info = {}
         self._layer_info_built = False
         self._current_step_prepared = -1
+        # Store stats for external logging (e.g., TensorBoard)
+        self.last_beta2_stats = {}
+        # This ensures the map is complete before the first backward pass,
+        # making it compatible with fused back pass mechanisms.
+        self._build_layer_info_if_needed()
     def _build_layer_info_if_needed(self):
         """Builds a map of layers and the parameters they contain."""
         if self._layer_info_built:
             return
         if not hasattr(self.optimizer, 'layer_key_fn') or self.optimizer.layer_key_fn is None:
             print("Warning: KourkoutasHelper requires 'layer_key_fn' on the optimizer. Defaulting to tensor-wise (id).")
             self.optimizer.layer_key_fn = lambda p: id(p)
         for group in self.optimizer.param_groups:
             for p in group['params']:
-                if p.grad is None: continue
+                # The mapping is static and should not depend on the presence of a gradient.
                 layer_key = self.optimizer.layer_key_fn(p)
                 if layer_key not in self.layer_info:
                     self.layer_info[layer_key] = {'params': [], 'group_ref': group}
@@ -46,14 +53,11 @@ class KourkoutasHelper:
         Calculates dynamic beta2 for all layers using the completed scalar accumulators
         from the PREVIOUS step. Should be called once at the start of an optimizer step.
         """
-        self._build_layer_info_if_needed()
-        # Check if logging is enabled for this step based on the interval
-        k_logging_interval = self.optimizer.param_groups[0].get('k_logging', 0)
-        is_logging_step = k_logging_interval > 0 and (current_step + 1) % k_logging_interval == 0
+        beta2_log = []
+        # These are just for the sample log, initialize them
+        sun, pooled_grad_norm, r_ema = (torch.tensor(0.0),)*3
-        beta2_log = [] if is_logging_step else None
-        first_layer_key = next(iter(self.layer_info), None)
         for layer_key, info in self.layer_info.items():
             params, group = info['params'], info['group_ref']
@@ -65,16 +69,15 @@ class KourkoutasHelper:
                 }
             layer_state = self.layer_state[layer_key]
             # Use the completed accumulator from the previous step
             pooled_grad_norm = torch.sqrt(layer_state['sum_sq_accumulator'])
             r_ema = layer_state['r_ema_grad_norm']
-            prev_r_ema_val = r_ema.item() # for logging
             # EMA is always updated, even during warmup
             r_ema.mul_(group['ema_alpha']).add_(pooled_grad_norm, alpha=1.0 - group['ema_alpha'])
             sun = torch.tensor(0.0, device=r_ema.device) # Default sun to 0 for warmup
             beta2_max = group['betas'][1]
@@ -89,16 +92,22 @@ class KourkoutasHelper:
             layer_state['dynamic_beta2'] = beta2.item() if isinstance(beta2, torch.Tensor) else beta2
             layer_state['sum_sq_accumulator'].zero_()
-            if is_logging_step:
-                beta2_log.append(layer_state['dynamic_beta2'])
-                if layer_key == first_layer_key:
-                    print(f"\n[Kourkoutas-β Debug] Step {current_step + 1} - Sample Layer '{layer_key}':")
-                    print(f"  - Grad Norm: {pooled_grad_norm.item():.4e}, Prev EMA: {prev_r_ema_val:.4e}, New EMA: {r_ema.item():.4e}")
-                    print(f"  - Sunspike: {sun.item():.4f}, Dynamic Beta2: {layer_state['dynamic_beta2']:.4f}")
-        if is_logging_step and beta2_log:
+            beta2_log.append(layer_state['dynamic_beta2'])
+        # Always compute stats for TensorBoard
+        if beta2_log:
             beta2_tensor = torch.tensor(beta2_log, device='cpu')
-            print(f"[Kourkoutas-β Debug] Step {current_step + 1} Overall Beta2 Stats: Min={beta2_tensor.min():.4f}, Max={beta2_tensor.max():.4f}, Mean={beta2_tensor.mean():.4f}")
+            self.last_beta2_stats = {
+                'min': beta2_tensor.min().item(),
+                'max': beta2_tensor.max().item(),
+                'mean': beta2_tensor.mean().item(),
+            }
+        # Handle periodic console logging
+        k_logging_interval = self.optimizer.param_groups[0].get('k_logging', 0)
+        is_logging_step = k_logging_interval > 0 and (current_step + 1) % k_logging_interval == 0
+        if is_logging_step and self.last_beta2_stats:
+            print(f"[Kourkoutas-β Debug] Step {current_step + 1} Overall Beta2 Stats: Min={self.last_beta2_stats['min']:.4f}, Max={self.last_beta2_stats['max']:.4f}, Mean={self.last_beta2_stats['mean']:.4f}")
     def maybe_prepare_step(self, current_step: int):
@@ -113,7 +122,6 @@ class KourkoutasHelper:
         """
         Accumulates the squared L2 norm of a single gradient for the next step's calculation.
         """
-        self._build_layer_info_if_needed()
         layer_key = self.optimizer.layer_key_fn(p)
         if layer_key in self.layer_info:

{adv_optm-1.1.0.dev2.dist-info → adv_optm-1.1.0.dev4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: adv_optm
-Version: 1.1.0.dev2
+Version: 1.1.0.dev4
 Summary: A family of highly efficient, lightweight yet powerful optimizers.
 Home-page: https://github.com/Koratahiu/Advanced_Optimizers
 Author: Koratahiu

{adv_optm-1.1.0.dev2.dist-info → adv_optm-1.1.0.dev4.dist-info}/RECORD RENAMED Viewed

@@ -1,20 +1,20 @@
-adv_optm/__init__.py,sha256=hkmbLr1AVDoC6VbnyTkNy-G4g5bmcLFH2Kv4dYWp9uY,311
+adv_optm/__init__.py,sha256=H4E_1__pXxRu4PSgQCzGi7WuFqVjTfex2Yduz3B3peI,311
 adv_optm/optim/AdamW_adv.py,sha256=H4XlYZELwiFvXt0A9wMlRNiw9c8rmPMspHDCvR_SZIQ,17487
-adv_optm/optim/Adopt_adv.py,sha256=PJ3ZaLgzYbvxXDS56FGjzMrVMyHDXSWdUPHnX5NpNAA,21241
-adv_optm/optim/Lion_Prodigy_adv.py,sha256=sGzhts9a6gHfCkuHTB5L9IrClo4c6UThzYYErBwqOaA,12844
+adv_optm/optim/Adopt_adv.py,sha256=0uMROjCw3wGOyp0ZX_xjwMVaXHJ395ifntcgY0MZt3M,21460
+adv_optm/optim/Lion_Prodigy_adv.py,sha256=xIrwibQ2i919EHEACLCrKe5JBnS-s2Ai35yeJ1Bn1MA,13159
 adv_optm/optim/Lion_adv.py,sha256=6G1CukJB_pC7l9HwFEuY1ydsNHZFabVmOvcHDsHHVuQ,8295
-adv_optm/optim/Prodigy_adv.py,sha256=-eMTutexbGrUQtSehKaOo6BO_p3QySpSIMgJKWvbxog,25517
+adv_optm/optim/Prodigy_adv.py,sha256=EeSfYu8IIeZX1Dk8MlD71vGOpMadtnW2iMhHxPDL2XQ,25574
 adv_optm/optim/Simplified_AdEMAMix.py,sha256=b4GaSI-TX6wFBqGxZeoJPbf2nVRCEtB3WVb1olDgY14,12980
 adv_optm/optim/__init__.py,sha256=pcP865H2j1tut2VfTUhzQh7V8TF_tzPjqFnjMfFed2k,382
 adv_optm/util/BF16_Stochastic_Rounding.py,sha256=Q5H0BcogmE4atP65dLoI21HKSf50lRdsBDfeF6v9Tbg,1548
 adv_optm/util/Effective_Shape.py,sha256=TBvIk1V8IuTbbBsxuekJA4e_v8JlR5Nujtut8RTWAm4,318
-adv_optm/util/Kourkoutas.py,sha256=6OzK96KJ7Dd9Py8hiGWszF9C_n4uVoDjFCA_EYbhL4c,6600
+adv_optm/util/Kourkoutas.py,sha256=st9hO2I0Xcby0LLq1MhxiEsPyNzEkNpJO_WfYvkioKg,6606
 adv_optm/util/NNMF.py,sha256=yRf5IP5Sjq0Uf0DxN0Q8NxEGSdD-f1ULziLVDOjY8K4,639
 adv_optm/util/One_Bit_Boolean.py,sha256=Wat49esdwohuN-OHOFMW8D0aOQgV9cP5Rl8z6yfmpos,1068
 adv_optm/util/OrthoGrad.py,sha256=NzInuBQGy_Ja__M1R9XbvqVaQ0fhGbtGgFE9YON7B3I,707
 adv_optm/util/__init__.py,sha256=qoyIF0jcLjs_vSEcsv36clw5LFNBEbifyXrrVxMH-G4,349
-adv_optm-1.1.0.dev2.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
-adv_optm-1.1.0.dev2.dist-info/METADATA,sha256=Y2F2wkpPmdbRtHft1KdCm1D6feTmiP5kFJ6iYpSLwCo,8427
-adv_optm-1.1.0.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-adv_optm-1.1.0.dev2.dist-info/top_level.txt,sha256=iNfBIIzu-lPrQ7jyC56WBCcbkRwitM2nJ15-MRQ_6fg,9
-adv_optm-1.1.0.dev2.dist-info/RECORD,,
+adv_optm-1.1.0.dev4.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+adv_optm-1.1.0.dev4.dist-info/METADATA,sha256=Ue6x-vthnxradX5tH1ver4LVbWMEMmqPjMVO8KjTdhI,8427
+adv_optm-1.1.0.dev4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+adv_optm-1.1.0.dev4.dist-info/top_level.txt,sha256=iNfBIIzu-lPrQ7jyC56WBCcbkRwitM2nJ15-MRQ_6fg,9
+adv_optm-1.1.0.dev4.dist-info/RECORD,,

{adv_optm-1.1.0.dev2.dist-info → adv_optm-1.1.0.dev4.dist-info}/WHEEL RENAMED Viewed

File without changes

{adv_optm-1.1.0.dev2.dist-info → adv_optm-1.1.0.dev4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{adv_optm-1.1.0.dev2.dist-info → adv_optm-1.1.0.dev4.dist-info}/top_level.txt RENAMED Viewed

File without changes

adv-optm 1.1.0.dev2__py3-none-any.whl → 1.1.0.dev4__py3-none-any.whl

Potentially problematic release.

adv-optm 1.1.0.dev2py3-none-any.whl → 1.1.0.dev4py3-none-any.whl