PyPI - mmgp - Versions diffs - 3.2.7__py3-none-any.whl → 3.3.0__py3-none-any.whl - Mend

mmgp 3.2.7py3-none-any.whl → 3.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (7) hide show

mmgp/offload.py +38 -36
{mmgp-3.2.7.dist-info → mmgp-3.3.0.dist-info}/METADATA +4 -3
mmgp-3.3.0.dist-info/RECORD +9 -0
{mmgp-3.2.7.dist-info → mmgp-3.3.0.dist-info}/WHEEL +1 -1
mmgp-3.2.7.dist-info/RECORD +0 -9
{mmgp-3.2.7.dist-info → mmgp-3.3.0.dist-info/licenses}/LICENSE.md +0 -0
{mmgp-3.2.7.dist-info → mmgp-3.3.0.dist-info}/top_level.txt +0 -0

mmgp/offload.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# ------------------ Memory Management 3.2.7 for the GPU Poor by DeepBeepMeep (mmgp)------------------
+# ------------------ Memory Management 3.3.0 for the GPU Poor by DeepBeepMeep (mmgp)------------------
 #
 # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ...  can run smoothly on a 24 GB GPU limited card.
 # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -575,7 +575,7 @@ def _welcome():
     if welcome_displayed:
          return
     welcome_displayed = True
-    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.7) by DeepBeepMeep ************{ENDC}{UNBOLD}")
+    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.8) by DeepBeepMeep ************{ENDC}{UNBOLD}")
 def _extract_num_from_str(num_in_str):
     size = len(num_in_str)
@@ -882,10 +882,11 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
             return source + CrLf + text
     def trunc(text, sz):
+        text = str(text)
         if len(text) < sz:
-            return str(text)
+            return text
         else:
-            return str(text)[0:sz] + '...'
+            return text[0:sz] + '...'
     if not isinstance(lora_path, list):
         lora_path = [lora_path]
@@ -910,6 +911,9 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
         skip = False
         state_dict = safetensors2.torch_load_file(path)
         if preprocess_sd != None:
             state_dict = preprocess_sd(state_dict)
@@ -947,42 +951,34 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
             # tied_weights = _extract_tie_weights_from_sd(state_dict, path) # to do
         clean_up = False
-        keys = list(state_dict.keys())
-        if len(keys) == 0:
+        first_key = next(iter(state_dict), None)
+        if first_key == None:
             msg = f"Empty Lora '{path}'"
             error_msg = append(error_msg, msg)
             fail = True
         if not fail:
-            network_alphas = {}
-            for k in keys:
-                if "alpha" in k:
-                    alpha_value = state_dict.pop(k)
-                    if not ( (torch.is_tensor(alpha_value) and torch.is_floating_point(alpha_value)) or isinstance(
-                        alpha_value, float
-                    )):
-                        network_alphas[k] =  torch.tensor( float(alpha_value.item() ) )
-            pos = keys[0].find(".")
-            prefix = keys[0][0:pos]
+            pos = first_key.find(".")
+            prefix = first_key[0:pos]
             if prefix not in ["diffusion_model", "transformer"]:
                 msg = f"No compatible weight was found in Lora file '{path}'. Please check that it is compatible with the Diffusers format."
                 error_msg = append(error_msg, msg)
                 fail = True
         if not fail:
             state_dict = { k[ len(prefix) + 1:]: v for k, v in state_dict.items() if k.startswith(prefix) }
-            rank = {}
             clean_up = True
-            # for key, val in state_dict.items():
-            #     if "lora_B" in key:
-            #         rank[key] = val.shape[1]
+            keys = list(state_dict.keys())
-            # if network_alphas is not None and len(network_alphas) >= 1:
-            #     alpha_keys = [k for k in network_alphas.keys() if k.startswith(prefix) and k.split(".")[0] == prefix]
-            #     network_alphas = {k.replace(f"{prefix}.", ""): v for k, v in network_alphas.items() if k in alpha_keys}
-            network_alphas = None
+            lora_alphas = {}
+            for k in keys:
+                if "alpha" in k:
+                    alpha_value = state_dict.pop(k)
+                    if torch.is_tensor(alpha_value):
+                        alpha_value = float(alpha_value.item())
+                    lora_alphas[k] = alpha_value
             invalid_keys = []
             unexpected_keys = []
@@ -1037,14 +1033,18 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
                         loras_module_data = dict()
                         loras_model_data[module] = loras_module_data
                     loras_adapter_data =  loras_module_data.get(adapter_name, None)
+                    lora_A = None if lora_A == None else lora_A.to(torch.bfloat16)
+                    lora_B = None if lora_B == None else lora_B.to(torch.bfloat16)
                     if loras_adapter_data == None:
-                        loras_adapter_data = [lora_A, lora_B]
+                        alpha = lora_alphas.get(k[:-len("lora_X.weight")] + "alpha", 1.)
+                        loras_adapter_data = [lora_A, lora_B, alpha]
                         loras_module_data[adapter_name] = loras_adapter_data
                     elif lora_A != None:
                         loras_adapter_data[0] = lora_A
                     else:
                         loras_adapter_data[1] = lora_B
             lora_A, lora_B, v, loras_module_data, loras_adapter_data = None, None, None, None, None
+            lora_alphas = None
             if len(invalid_keys)  > 0:
                 msg = "Lora '{path}' contains non Lora keys '{trunc(invalid_keys,200)}'"
@@ -1409,7 +1409,9 @@ def extract_models(obj = None, prefix = None):
     elif prefix[ -1:] != "/":
         prefix  + "/"
-    for name in dir(obj):
+    for name in dir(obj):
+        if name in ["_execution_device"]:
+            continue
         element = getattr(obj,name)
         if name  in ("pipeline", "pipe"):
             pipeline = element
@@ -1548,10 +1550,10 @@ class offload:
                 lora_data = lora_module.get(adapter, None)
                 if lora_data == None:
                     continue
-                lora_A, lora_B = lora_data
+                lora_A, lora_B, alpha = lora_data
                 key = adapter + '_GPU'
                 if to_GPU:
-                    lora_module[key] = [lora_A.cuda(), lora_B.cuda()]
+                    lora_module[key] = [lora_A.cuda(non_blocking=True), lora_B.cuda(non_blocking=True), alpha]
                 elif key in lora_module:
                     del lora_module[key]
@@ -1595,8 +1597,8 @@ class offload:
                         lora_data =  loras_model_data.get(parent_module, None)
                         if lora_data != None:
                             loras_modules[parent_module]= lora_data
-            if len(loras_modules) > 0:
-                self._move_loras(loras_active_adapters, loras_modules, True)
+                if len(loras_modules) > 0:
+                    self._move_loras(loras_active_adapters, loras_modules, True)
         loaded_block = self.loaded_blocks[model_id]
@@ -1801,8 +1803,8 @@ class offload:
                     data = loras_data.get(active_adapter + '_GPU', None)
                     if data == None:
                         continue
-                    lora_A_weight, lora_B_weight = data
-                    scaling = get_scaling(active_adapter)
+                    lora_A_weight, lora_B_weight, alpha = data
+                    scaling = get_scaling(active_adapter) * alpha
                     weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
                     # base_weight += scaling * lora_B_weight @ lora_A_weight
@@ -1822,9 +1824,9 @@ class offload:
                     data = loras_data.get(active_adapter + '_GPU', None)
                     if data == None:
                         continue
-                    lora_A, lora_B = data
+                    lora_A, lora_B, alpha = data
                     # dropout = self.lora_dropout[active_adapter]
-                    scaling = get_scaling(active_adapter)
+                    scaling = get_scaling(active_adapter) * alpha
                     x = x.to(lora_A.dtype)
                     if training:
@@ -2248,7 +2250,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, l
                     print(f"Model '{model_id}' already pinned to reserved memory")
             else:
                 _pin_to_memory(current_model, model_id, partialPinning= partialPinning, pinnedPEFTLora = pinnedPEFTLora, verboseLevel=verboseLevel)
         current_budget = model_budgets[model_id]
         cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
         self.loaded_blocks[model_id] = None

{mmgp-3.2.7.dist-info → mmgp-3.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: mmgp
-Version: 3.2.7
+Version: 3.3.0
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                     GNU GENERAL PUBLIC LICENSE
@@ -13,10 +13,11 @@ Requires-Dist: optimum-quanto
 Requires-Dist: accelerate
 Requires-Dist: safetensors
 Requires-Dist: psutil
+Dynamic: license-file
 <p align="center">
-  <H2>Memory Management 3.2.7 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.3.0 for the GPU Poor by DeepBeepMeep</H2>
 </p>

mmgp-3.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
+mmgp/offload.py,sha256=xdlYbB8nKUywAAMPcfCzJmCxYHvBB5vcZgv2wEQTtbE,105329
+mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
+mmgp-3.3.0.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
+mmgp-3.3.0.dist-info/METADATA,sha256=33eB_YmC6PciTkzi_Z_gsWWzoz6RJgyLbEItFatVghk,16153
+mmgp-3.3.0.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
+mmgp-3.3.0.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
+mmgp-3.3.0.dist-info/RECORD,,

{mmgp-3.2.7.dist-info → mmgp-3.3.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (76.0.0)
+Generator: setuptools (77.0.3)
 Root-Is-Purelib: true
 Tag: py3-none-any

mmgp-3.2.7.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
-mmgp/offload.py,sha256=6qJrxM3EPqUHC04njZetVY2sr2x9DQwh13CZIM5oLIA,105417
-mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
-mmgp-3.2.7.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
-mmgp-3.2.7.dist-info/METADATA,sha256=zu_MxYB3j6sYNqQShyKnNwJkv0_j-fO6qOHoO8PUUfY,16131
-mmgp-3.2.7.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
-mmgp-3.2.7.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
-mmgp-3.2.7.dist-info/RECORD,,

{mmgp-3.2.7.dist-info → mmgp-3.3.0.dist-info/licenses}/LICENSE.md RENAMED Viewed

File without changes

{mmgp-3.2.7.dist-info → mmgp-3.3.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

mmgp 3.2.7__py3-none-any.whl → 3.3.0__py3-none-any.whl

Potentially problematic release.

mmgp 3.2.7py3-none-any.whl → 3.3.0py3-none-any.whl