PyPI - mmgp - Versions diffs - 3.2.7__tar.gz → 3.2.8__tar.gz - Mend

mmgp 3.2.7tar.gz → 3.2.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (14) hide show

{mmgp-3.2.7/src/mmgp.egg-info → mmgp-3.2.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: mmgp
-Version: 3.2.7
+Version: 3.2.8
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                     GNU GENERAL PUBLIC LICENSE
@@ -16,7 +16,7 @@ Requires-Dist: psutil
 <p align="center">
-  <H2>Memory Management 3.2.7 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.2.8 for the GPU Poor by DeepBeepMeep</H2>
 </p>

{mmgp-3.2.7 → mmgp-3.2.8}/README.md RENAMED Viewed

@@ -1,6 +1,6 @@
 <p align="center">
-  <H2>Memory Management 3.2.7 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.2.8 for the GPU Poor by DeepBeepMeep</H2>
 </p>

{mmgp-3.2.7 → mmgp-3.2.8}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "mmgp"
-version = "3.2.7"
+version = "3.2.8"
 authors = [
   { name = "deepbeepmeep", email = "deepbeepmeep@yahoo.com" },
 ]

{mmgp-3.2.7 → mmgp-3.2.8}/src/mmgp/offload.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# ------------------ Memory Management 3.2.7 for the GPU Poor by DeepBeepMeep (mmgp)------------------
+# ------------------ Memory Management 3.2.8 for the GPU Poor by DeepBeepMeep (mmgp)------------------
 #
 # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ...  can run smoothly on a 24 GB GPU limited card.
 # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -575,7 +575,7 @@ def _welcome():
     if welcome_displayed:
          return
     welcome_displayed = True
-    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.7) by DeepBeepMeep ************{ENDC}{UNBOLD}")
+    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.8) by DeepBeepMeep ************{ENDC}{UNBOLD}")
 def _extract_num_from_str(num_in_str):
     size = len(num_in_str)
@@ -910,6 +910,9 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
         skip = False
         state_dict = safetensors2.torch_load_file(path)
         if preprocess_sd != None:
             state_dict = preprocess_sd(state_dict)
@@ -947,42 +950,34 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
             # tied_weights = _extract_tie_weights_from_sd(state_dict, path) # to do
         clean_up = False
-        keys = list(state_dict.keys())
-        if len(keys) == 0:
+        first_key = next(iter(state_dict), None)
+        if first_key == None:
             msg = f"Empty Lora '{path}'"
             error_msg = append(error_msg, msg)
             fail = True
         if not fail:
-            network_alphas = {}
-            for k in keys:
-                if "alpha" in k:
-                    alpha_value = state_dict.pop(k)
-                    if not ( (torch.is_tensor(alpha_value) and torch.is_floating_point(alpha_value)) or isinstance(
-                        alpha_value, float
-                    )):
-                        network_alphas[k] =  torch.tensor( float(alpha_value.item() ) )
-            pos = keys[0].find(".")
-            prefix = keys[0][0:pos]
+            pos = first_key.find(".")
+            prefix = first_key[0:pos]
             if prefix not in ["diffusion_model", "transformer"]:
                 msg = f"No compatible weight was found in Lora file '{path}'. Please check that it is compatible with the Diffusers format."
                 error_msg = append(error_msg, msg)
                 fail = True
         if not fail:
             state_dict = { k[ len(prefix) + 1:]: v for k, v in state_dict.items() if k.startswith(prefix) }
-            rank = {}
             clean_up = True
-            # for key, val in state_dict.items():
-            #     if "lora_B" in key:
-            #         rank[key] = val.shape[1]
+            keys = list(state_dict.keys())
-            # if network_alphas is not None and len(network_alphas) >= 1:
-            #     alpha_keys = [k for k in network_alphas.keys() if k.startswith(prefix) and k.split(".")[0] == prefix]
-            #     network_alphas = {k.replace(f"{prefix}.", ""): v for k, v in network_alphas.items() if k in alpha_keys}
-            network_alphas = None
+            lora_alphas = {}
+            for k in keys:
+                if "alpha" in k:
+                    alpha_value = state_dict.pop(k)
+                    if torch.is_tensor(alpha_value):
+                        alpha_value = float(alpha_value.item())
+                    lora_alphas[k] = alpha_value
             invalid_keys = []
             unexpected_keys = []
@@ -1037,14 +1032,18 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
                         loras_module_data = dict()
                         loras_model_data[module] = loras_module_data
                     loras_adapter_data =  loras_module_data.get(adapter_name, None)
+                    lora_A = None if lora_A == None else lora_A.to(torch.bfloat16)
+                    lora_B = None if lora_B == None else lora_B.to(torch.bfloat16)
                     if loras_adapter_data == None:
-                        loras_adapter_data = [lora_A, lora_B]
+                        alpha = lora_alphas.get(k[:-len("lora_X.weight")] + "alpha", 1.)
+                        loras_adapter_data = [lora_A, lora_B, alpha]
                         loras_module_data[adapter_name] = loras_adapter_data
                     elif lora_A != None:
                         loras_adapter_data[0] = lora_A
                     else:
                         loras_adapter_data[1] = lora_B
             lora_A, lora_B, v, loras_module_data, loras_adapter_data = None, None, None, None, None
+            lora_alphas = None
             if len(invalid_keys)  > 0:
                 msg = "Lora '{path}' contains non Lora keys '{trunc(invalid_keys,200)}'"
@@ -1548,10 +1547,10 @@ class offload:
                 lora_data = lora_module.get(adapter, None)
                 if lora_data == None:
                     continue
-                lora_A, lora_B = lora_data
+                lora_A, lora_B, alpha = lora_data
                 key = adapter + '_GPU'
                 if to_GPU:
-                    lora_module[key] = [lora_A.cuda(), lora_B.cuda()]
+                    lora_module[key] = [lora_A.cuda(), lora_B.cuda(), alpha]
                 elif key in lora_module:
                     del lora_module[key]
@@ -1801,8 +1800,8 @@ class offload:
                     data = loras_data.get(active_adapter + '_GPU', None)
                     if data == None:
                         continue
-                    lora_A_weight, lora_B_weight = data
-                    scaling = get_scaling(active_adapter)
+                    lora_A_weight, lora_B_weight, alpha = data
+                    scaling = get_scaling(active_adapter) * alpha
                     weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
                     # base_weight += scaling * lora_B_weight @ lora_A_weight
@@ -1822,9 +1821,9 @@ class offload:
                     data = loras_data.get(active_adapter + '_GPU', None)
                     if data == None:
                         continue
-                    lora_A, lora_B = data
+                    lora_A, lora_B, alpha = data
                     # dropout = self.lora_dropout[active_adapter]
-                    scaling = get_scaling(active_adapter)
+                    scaling = get_scaling(active_adapter) * alpha
                     x = x.to(lora_A.dtype)
                     if training:
@@ -2248,7 +2247,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, l
                     print(f"Model '{model_id}' already pinned to reserved memory")
             else:
                 _pin_to_memory(current_model, model_id, partialPinning= partialPinning, pinnedPEFTLora = pinnedPEFTLora, verboseLevel=verboseLevel)
         current_budget = model_budgets[model_id]
         cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
         self.loaded_blocks[model_id] = None

{mmgp-3.2.7 → mmgp-3.2.8/src/mmgp.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: mmgp
-Version: 3.2.7
+Version: 3.2.8
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                     GNU GENERAL PUBLIC LICENSE
@@ -16,7 +16,7 @@ Requires-Dist: psutil
 <p align="center">
-  <H2>Memory Management 3.2.7 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.2.8 for the GPU Poor by DeepBeepMeep</H2>
 </p>