PyPI - mmgp - Versions diffs - 3.4.7__py3-none-any.whl → 3.4.9__py3-none-any.whl - Mend

mmgp 3.4.7py3-none-any.whl → 3.4.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (7) hide show

mmgp/offload.py +33 -26
{mmgp-3.4.7.dist-info → mmgp-3.4.9.dist-info}/METADATA +2 -2
mmgp-3.4.9.dist-info/RECORD +9 -0
{mmgp-3.4.7.dist-info → mmgp-3.4.9.dist-info}/WHEEL +1 -1
mmgp-3.4.7.dist-info/RECORD +0 -9
{mmgp-3.4.7.dist-info → mmgp-3.4.9.dist-info}/licenses/LICENSE.md +0 -0
{mmgp-3.4.7.dist-info → mmgp-3.4.9.dist-info}/top_level.txt +0 -0

mmgp/offload.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# ------------------ Memory Management 3.4.7 for the GPU Poor by DeepBeepMeep (mmgp)------------------
+# ------------------ Memory Management 3.4.9 for the GPU Poor by DeepBeepMeep (mmgp)------------------
 #
 # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ...  can run smoothly on a 24 GB GPU limited card.
 # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -448,9 +448,9 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
                 print(f"'{','.join(names_list)}' was partially pinned to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
         else:
             if len(names_list) > 0:
-                print(f"'{','.join(names_list)}' was pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
-            else:
                 print(f"'{','.join(names_list)}' were pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
+            else:
+                print(f"'{','.join(names_list)}' was pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
     return
@@ -658,7 +658,7 @@ def _welcome():
     if welcome_displayed:
          return
     welcome_displayed = True
-    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.7) by DeepBeepMeep ************{ENDC}{UNBOLD}")
+    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.9) by DeepBeepMeep ************{ENDC}{UNBOLD}")
 def change_dtype(model, new_dtype, exclude_buffers = False):
     for submodule_name, submodule in model.named_modules():
@@ -1226,7 +1226,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
     model._loras_errors = errors
     if not check_only:
-        if pinnedLora:
+        if pinnedLora and len(pinned_sd_list) > 0:
             _pin_sd_to_memory(pinned_sd_list, pinned_names_list)
         model._loras_adapters = adapters
     if activate_all_loras:
@@ -1275,7 +1275,7 @@ def move_loras_to_device(model, device="cpu" ):
         if ".lora_" in k:
             m.to(device)
-def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType =  qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, configKwargs ={}):
+def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType =  qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, defaultConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, configKwargs ={}):
     """
     quick version of .LoadfromPretrained of  the transformers library
     used to build a model and load the corresponding weights (quantized or not)
@@ -1308,7 +1308,7 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
         if forcedConfigPath != None:
             config_fullpath = forcedConfigPath
         else:
-            config_fullpath =  os.path.join(os.path.dirname(model_path[-1]), "config.json")
+            config_fullpath =  os.path.join(os.path.dirname(model_path[-1]), "config.json") if defaultConfigPath == None else defaultConfigPath
         if not os.path.isfile(config_fullpath):
             raise Exception("a 'config.json' that describes the model is required in the directory of the model or inside the safetensor file")
@@ -1407,14 +1407,14 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
         if not (".safetensors" in file or ".sft" in file):
             if pinToMemory:
                 raise Exception("Pinning to memory while loading only supported for safe tensors files")
-            state_dict = torch.load(file, weights_only=True)
+            state_dict = torch.load(file, weights_only=True, map_location="cpu")
             if "module" in state_dict:
                 state_dict = state_dict["module"]
         else:
             basename = os.path.basename(file)
-            if "model-0" in basename:
+            if "-of-" in basename:
                 metadata = None
                 file_parts= basename.split("-")
                 parts_max = int(file_parts[-1][:5])
@@ -1500,11 +1500,11 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
     if do_quantize:
         if quantization_map != None and len(quantization_map) > 0 :
-            if _quantize(model, quantizationType, verboseLevel=verboseLevel, model_id=file_path):
-                quantization_map = model._quanto_map
-        else:
             if verboseLevel >=1:
                 print("Model already quantized")
+        else:
+            if _quantize(model, quantizationType, verboseLevel=verboseLevel, model_id=file_path):
+                quantization_map = model._quanto_map
     if pinToMemory:
         _pin_to_memory(model, file_path, partialPinning = partialPinning, verboseLevel = verboseLevel)
@@ -1621,9 +1621,12 @@ class HfHook:
     def __init__(self):
         self.execution_device = "cuda"
-    def detach_hook(self, module):
-        pass
+    def init_hook(self, module):
+        return module
+    def detach_hook(self, module):
+        return module
 last_offload_obj = None
 class offload:
     def __init__(self):
@@ -2028,7 +2031,9 @@ class offload:
                         continue
                     lora_A_weight, lora_B_weight, diff_b, alpha = data
                     scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
-                    weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
+                    if lora_A_weight != None:
+                        weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
                     if diff_b != None:
                         if bias == None:
                             bias = diff_b.clone()
@@ -2059,17 +2064,20 @@ class offload:
                     lora_A, lora_B, diff_b, alpha = data
                     # dropout = self.lora_dropout[active_adapter]
                     scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
-                    x = x.to(lora_A.dtype)
-                    if training:
-                        pass
-                        # y = lora_A(dropout(x))
+                    if lora_A == None:
+                        result.add_(diff_b, alpha=scaling)
                     else:
-                        y = torch.nn.functional.linear(x, lora_A, bias=None)
-                    y = torch.nn.functional.linear(y, lora_B, bias=diff_b)
-                    y*= scaling
-                    result+= y
-                    del y
+                        x = x.to(lora_A.dtype)
+                        if training:
+                            pass
+                            # y = lora_A(dropout(x))
+                        else:
+                            y = torch.nn.functional.linear(x, lora_A, bias=None)
+                        y = torch.nn.functional.linear(y, lora_B, bias=diff_b)
+                        y*= scaling
+                        result+= y
+                        del y
         return result
@@ -2405,7 +2413,6 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
         model_dtype = getattr(current_model, "_model_dtype", None)
         # if model_dtype == None:
         #     model_dtype = getattr(current_model, "dtype", None)
         for _ , m in current_model.named_modules():
             ignore_dtype = hasattr(m, "_lock_dtype")
             for n, p in m.named_parameters(recurse = False):

{mmgp-3.4.7.dist-info → mmgp-3.4.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mmgp
-Version: 3.4.7
+Version: 3.4.9
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
 <p align="center">
-  <H2>Memory Management 3.4.7 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.4.9 for the GPU Poor by DeepBeepMeep</H2>
 </p>

mmgp-3.4.9.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
+mmgp/offload.py,sha256=oIicu2S5E_lBlA3wqi5RW9UIQe9vCKCka_wkTXJwlUg,121549
+mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
+mmgp-3.4.9.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
+mmgp-3.4.9.dist-info/METADATA,sha256=XwnMyOWZbXeKOyaTjOOMf_6j7jcOFs46P0b4tKv7BlU,16309
+mmgp-3.4.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+mmgp-3.4.9.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
+mmgp-3.4.9.dist-info/RECORD,,

{mmgp-3.4.7.dist-info → mmgp-3.4.9.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.8.0)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

mmgp-3.4.7.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
-mmgp/offload.py,sha256=wf5u4qaGKYm6OTyGg4TXCa3aA0h3nuhnml7qOzn6JOY,121124
-mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
-mmgp-3.4.7.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
-mmgp-3.4.7.dist-info/METADATA,sha256=DztABKlGwAcKYogKuTzsOWs0he3elTFZXAkWpH4yIEU,16309
-mmgp-3.4.7.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
-mmgp-3.4.7.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
-mmgp-3.4.7.dist-info/RECORD,,

{mmgp-3.4.7.dist-info → mmgp-3.4.9.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{mmgp-3.4.7.dist-info → mmgp-3.4.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

mmgp 3.4.7__py3-none-any.whl → 3.4.9__py3-none-any.whl

Potentially problematic release.

mmgp 3.4.7py3-none-any.whl → 3.4.9py3-none-any.whl