PyPI - mmgp - Versions diffs - 3.3.3__tar.gz → 3.3.4__tar.gz - Mend

mmgp 3.3.3tar.gz → 3.3.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (14) hide show

{mmgp-3.3.3/src/mmgp.egg-info → mmgp-3.3.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mmgp
-Version: 3.3.3
+Version: 3.3.4
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                     GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Dynamic: license-file
 <p align="center">
-  <H2>Memory Management 3.3.3 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.3.4  for the GPU Poor by DeepBeepMeep</H2>
 </p>

{mmgp-3.3.3 → mmgp-3.3.4}/README.md RENAMED Viewed

@@ -1,6 +1,6 @@
 <p align="center">
-  <H2>Memory Management 3.3.3 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.3.4  for the GPU Poor by DeepBeepMeep</H2>
 </p>

{mmgp-3.3.3 → mmgp-3.3.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "mmgp"
-version = "3.3.3"
+version = "3.3.4"
 authors = [
   { name = "deepbeepmeep", email = "deepbeepmeep@yahoo.com" },
 ]

{mmgp-3.3.3 → mmgp-3.3.4}/src/mmgp/offload.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# ------------------ Memory Management 3.3.3 for the GPU Poor by DeepBeepMeep (mmgp)------------------
+# ------------------ Memory Management 3.3.4 for the GPU Poor by DeepBeepMeep (mmgp)------------------
 #
 # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ...  can run smoothly on a 24 GB GPU limited card.
 # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -591,7 +591,7 @@ def _welcome():
     if welcome_displayed:
          return
     welcome_displayed = True
-    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.3.3) by DeepBeepMeep ************{ENDC}{UNBOLD}")
+    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.3.4) by DeepBeepMeep ************{ENDC}{UNBOLD}")
 def _extract_num_from_str(num_in_str):
     size = len(num_in_str)
@@ -877,17 +877,15 @@ def split_linear_modules(model, map ):
 def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, check_only = False, ignore_model_variations = False, pinnedLora = False, split_linear_modules_map = None, preprocess_sd = None, verboseLevel = -1,):
     verboseLevel = _compute_verbose_level(verboseLevel)
-    modules_dict = {k: v for k,v in model.named_modules()}
+    loras_model_data = getattr(model, "_loras_model_data", None)
+    if loras_model_data == None:
+        raise Exception(f"No Loras has been declared for this model while creating the corresponding offload object")
     if not check_only:
-        loras_model_data = dict()
-        model._loras_model_data = loras_model_data
-        loras_active_adapters = set()
-        model._loras_active_adapters = loras_active_adapters
-        loras_scaling = dict()
-        model._loras_scaling = loras_scaling
-        loras_tied_weights = dict()
-        model._loras_tied_weights = loras_tied_weights
+        unload_loras_from_model(model)
+    modules_dict = {k: v for k,v in model.named_modules()}
     CrLf = '\r\n'
     error_msg = ""
@@ -927,9 +925,6 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
         skip = False
         state_dict = safetensors2.torch_load_file(path, writable_tensors= False)
         if preprocess_sd != None:
             state_dict = preprocess_sd(state_dict)
@@ -1045,9 +1040,10 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
                         break
                 if not check_only:
                     loras_module_data = loras_model_data.get(module, None)
-                    if loras_module_data == None:
-                        loras_module_data = dict()
-                        loras_model_data[module] = loras_module_data
+                    assert loras_module_data != None
+                    # if loras_module_data == None:
+                    #     loras_module_data = dict()
+                    #     loras_model_data[module] = loras_module_data
                     loras_adapter_data =  loras_module_data.get(adapter_name, None)
                     lora_A = None if lora_A == None else lora_A.to(torch.bfloat16)
                     lora_B = None if lora_B == None else lora_B.to(torch.bfloat16)
@@ -1108,12 +1104,17 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
     return new_lora_path
 def unload_loras_from_model(model):
-    model._loras_model_data = None
+    for _, v in model._loras_model_data.items():
+        v.clear()
+    model._loras_active_adapters = set()
+    model._loras_scaling = dict()
+    model._loras_tied_weights = dict()
     model._loras_errors = None
     model._loras_adapters = None
-    model._loras_active_adapters = None
     model._loras_scaling = None
 def set_step_no_for_lora(model, step_no):
     model._lora_step_no = step_no
@@ -1857,14 +1858,14 @@ class offload:
         return result
-    def hook_lora_linear(self, submodule, current_model, model_id, submodule_name):
+    def hook_lora_linear(self, submodule, current_model, model_id, loras_model_data, submodule_name):
         old_forward = submodule.forward
+        loras_data = {}
+        loras_model_data[submodule] = loras_data
         def  lora_linear_forward(module,  *args, **kwargs):
-            loras_model_data = getattr(current_model, "_loras_model_data", None)
-            loras_data = None
-            if loras_model_data != None:
-                loras_data = loras_model_data.get(submodule, None)
-            if loras_data == None:
+            if len(loras_data) == 0:
                 return old_forward(*args, **kwargs)
             else:
                 return self._lora_linear_forward(current_model, submodule, loras_data,  *args, **kwargs)
@@ -2271,7 +2272,10 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
         current_budget = model_budgets[model_id]
         cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
         self.loaded_blocks[model_id] = None
-        any_lora =  loras !=None and model_id in loras  or getattr(current_model, "_loras_model_data", False)
+        any_lora =  loras !=None and model_id in loras
+        if any_lora:
+            loras_model_data = {}
+            current_model._loras_model_data = loras_model_data
         for submodule_name, submodule in current_model.named_modules():
             # create a fake 'accelerate' parameter so that the _execution_device property returns always "cuda"
             # (it is queried in many pipelines even if offloading is not properly implemented)
@@ -2304,7 +2308,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
             if hasattr(submodule, "forward"):
                 if  any_lora and isinstance(submodule, torch.nn.Linear):
-                    submodule_method = self.hook_lora_linear(submodule, current_model, model_id, submodule_name)
+                    submodule_method = self.hook_lora_linear(submodule, current_model, model_id, loras_model_data, submodule_name)
                 else:
                     submodule_method = getattr(submodule, "forward")
                 if callable(submodule_method):

{mmgp-3.3.3 → mmgp-3.3.4/src/mmgp.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mmgp
-Version: 3.3.3
+Version: 3.3.4
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                     GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Dynamic: license-file
 <p align="center">
-  <H2>Memory Management 3.3.3 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.3.4  for the GPU Poor by DeepBeepMeep</H2>
 </p>