PyPI - mmgp - Versions diffs - 3.2.3__py3-none-any.whl → 3.2.4__py3-none-any.whl - Mend

mmgp 3.2.3py3-none-any.whl → 3.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (7) hide show

mmgp/offload.py +41 -33
{mmgp-3.2.3.dist-info → mmgp-3.2.4.dist-info}/METADATA +2 -2
mmgp-3.2.4.dist-info/RECORD +9 -0
mmgp-3.2.3.dist-info/RECORD +0 -9
{mmgp-3.2.3.dist-info → mmgp-3.2.4.dist-info}/LICENSE.md +0 -0
{mmgp-3.2.3.dist-info → mmgp-3.2.4.dist-info}/WHEEL +0 -0
{mmgp-3.2.3.dist-info → mmgp-3.2.4.dist-info}/top_level.txt +0 -0

mmgp/offload.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# ------------------ Memory Management 3.2.3 for the GPU Poor by DeepBeepMeep (mmgp)------------------
+# ------------------ Memory Management 3.2.4 for the GPU Poor by DeepBeepMeep (mmgp)------------------
 #
 # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ...  can run smoothly on a 24 GB GPU limited card.
 # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -479,7 +479,7 @@ def _welcome():
     if welcome_displayed:
          return
     welcome_displayed = True
-    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.3) by DeepBeepMeep ************{ENDC}{UNBOLD}")
+    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.4) by DeepBeepMeep ************{ENDC}{UNBOLD}")
 def _extract_num_from_str(num_in_str):
     size = len(num_in_str)
@@ -858,7 +858,7 @@ def _lora_linear_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor
         result = result.to(torch_result_dtype)
         return result
-def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, split_linear_modules_map = None,verboseLevel = -1,):
+def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, split_linear_modules_map = None, preprocess_sd = None, verboseLevel = -1,):
     verboseLevel = _compute_verbose_level(verboseLevel)
     if inject_adapter_in_model == None or set_weights_and_activate_adapters == None or  get_peft_kwargs == None:
@@ -877,7 +877,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
         adapter_name = str(i)
         state_dict = safetensors2.torch_load_file(path)
+        if preprocess_sd != None:
+            state_dict = preprocess_sd(state_dict)
         if split_linear_modules_map != None:
             new_state_dict = {}
@@ -977,7 +978,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
             # Check only for unexpected keys.
             unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
             if unexpected_keys:
-                pass
+                raise Exception(f"Lora '{path}' contains invalid keys '{unexpected_keys}'")
         if verboseLevel >=1:
             print(f"Lora '{path}' was loaded in model '{_get_module_name(model)}'")
     if activate_all_loras:
@@ -1015,7 +1017,7 @@ def move_loras_to_device(model, device="cpu" ):
         if ".lora_" in k:
             m.to(device)
-def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType =  qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, verboseLevel = -1):
+def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType =  qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, modelPrefix = None, verboseLevel = -1):
     """
     quick version of .LoadfromPretrained of  the transformers library
     used to build a model and load the corresponding weights (quantized or not)
@@ -1096,13 +1098,13 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
     model._config = transformer_config
-    load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, verboseLevel=verboseLevel )
+    load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, verboseLevel=verboseLevel )
     return model
-def load_model_data(model, file_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, verboseLevel = -1):
+def load_model_data(model, file_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, verboseLevel = -1):
     """
     Load a model, detect if it has been previously quantized using quanto and do the extra setup if necessary
     """
@@ -1113,6 +1115,26 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
     verboseLevel = _compute_verbose_level(verboseLevel)
     model = _remove_model_wrapper(model)
+    def filter_state_dict(state_dict, base_model_prefix):
+        new_state_dict= {}
+        start = -1
+        for k,v in state_dict.items():
+            if k.startswith(base_model_prefix):
+                new_start = len(base_model_prefix)
+            else:
+                pos = k.find("." + base_model_prefix)
+                if pos < 0:
+                    continue
+                new_start = pos + len(base_model_prefix)  +1
+            if start != -1 and start != new_start:
+                new_state_dict  = state_dict
+                break
+            start = new_start
+            new_state_dict[k[ start:]] = v
+        return new_state_dict
     if not (".safetensors" in file_path or ".sft" in file_path):
         if pinToMemory:
             raise Exception("Pinning to memory while loading only supported for safe tensors files")
@@ -1151,6 +1173,11 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
                     quantization_map = json.load(f)
+        # deal if we are trying to load just a sub part of a larger model
+        if modelPrefix != None:
+            base_model_prefix = modelPrefix + "."
+            state_dict = filter_state_dict(state_dict,base_model_prefix)
+            quantization_map = filter_state_dict(quantization_map,base_model_prefix)
         if quantization_map is None :
             if "quanto" in file_path and not do_quantize:
@@ -1160,32 +1187,12 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
     missing_keys , unexpected_keys = model.load_state_dict(state_dict, False,  assign = True )
     if len(missing_keys) > 0 :
-        # if there is a key mismatch maybe we forgot to remove some prefix or we are trying to load just a sub part of a larger model
-        if hasattr(model, "base_model_prefix"):
-            base_model_prefix = model.base_model_prefix + "."
-        else:
-            for k,v in state_dict.items():
-                if k.endswith(missing_keys[0]):
-                    base_model_prefix = k[:-len(missing_keys[0])]
-                    break
-        new_state_dict= {}
-        start = -1
+        # if there is a key mismatch maybe we forgot to remove some prefix
         for k,v in state_dict.items():
-            if k.startswith(base_model_prefix):
-                new_start = len(base_model_prefix)
-            else:
-                pos = k.find("." + base_model_prefix)
-                if pos < 0:
-                    continue
-                new_start = pos + len(base_model_prefix)  +1
-            if start != -1 and start != new_start:
-                new_state_dict  = state_dict
+            if k.endswith(missing_keys[0]):
+                base_model_prefix = k[:-len(missing_keys[0])]
                 break
-            start = new_start
-            new_state_dict[k[ start:]] = v
-        state_dict = new_state_dict
-        del new_state_dict
+        state_dict = filter_state_dict(state_dict,base_model_prefix)
         missing_keys , unexpected_keys = model.load_state_dict(state_dict, False,  assign = True )
     del state_dict
@@ -1354,6 +1361,8 @@ class offload:
     def add_module_to_blocks(self, model_id, blocks_name, submodule, prev_block_name, submodule_name):
+        if blocks_name!=None and ".lora_" in blocks_name:
+            blocks_name = None
         entry_name = model_id if blocks_name is None else model_id + "/" + blocks_name
         if entry_name in self.blocks_of_modules:
             blocks_params = self.blocks_of_modules[entry_name]
@@ -1372,7 +1381,6 @@ class offload:
         lora_name = None
         if self.lora_parents.get(submodule, None) != None:
             lora_name = str(submodule_name[ submodule_name.rfind(".") + 1: ] )
         for k,p in submodule.named_parameters(recurse=False):
             param_size = 0
             ref = _get_tensor_ref(p)

{mmgp-3.2.3.dist-info → mmgp-3.2.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: mmgp
-Version: 3.2.3
+Version: 3.2.4
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                     GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Requires-Dist: peft
 <p align="center">
-  <H2>Memory Management 3.2.3 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.2.4 for the GPU Poor by DeepBeepMeep</H2>
 </p>

mmgp-3.2.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
+mmgp/offload.py,sha256=vGxgCcWV8PQQ4JjSlYFOX57Mr9RLlvPBMOOj3f63qL4,96389
+mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
+mmgp-3.2.4.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
+mmgp-3.2.4.dist-info/METADATA,sha256=UGZ7ADvrhU5P0hS7gFgu8SHpEnzzpEgE3Ionk-I7ckw,16151
+mmgp-3.2.4.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
+mmgp-3.2.4.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
+mmgp-3.2.4.dist-info/RECORD,,

mmgp-3.2.3.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
-mmgp/offload.py,sha256=EeC-eSJLq8Z8K1wq7UGCzDpaW7JAL-RSFVr8fPUmtPc,95853
-mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
-mmgp-3.2.3.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
-mmgp-3.2.3.dist-info/METADATA,sha256=9Z2SIaf6fBdZDuIn8Pqqr93qXZS_tiRLU9KbKMDSuSM,16151
-mmgp-3.2.3.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
-mmgp-3.2.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
-mmgp-3.2.3.dist-info/RECORD,,

{mmgp-3.2.3.dist-info → mmgp-3.2.4.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mmgp-3.2.3.dist-info → mmgp-3.2.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{mmgp-3.2.3.dist-info → mmgp-3.2.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

mmgp 3.2.3__py3-none-any.whl → 3.2.4__py3-none-any.whl

Potentially problematic release.

mmgp 3.2.3py3-none-any.whl → 3.2.4py3-none-any.whl