PyPI - mmgp - Versions diffs - 3.5.6__py3-none-any.whl → 3.5.7__py3-none-any.whl - Mend

mmgp 3.5.6py3-none-any.whl → 3.5.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

mmgp/offload.py +28 -22
{mmgp-3.5.6.dist-info → mmgp-3.5.7.dist-info}/METADATA +2 -2
mmgp-3.5.7.dist-info/RECORD +9 -0
mmgp-3.5.6.dist-info/RECORD +0 -9
{mmgp-3.5.6.dist-info → mmgp-3.5.7.dist-info}/WHEEL +0 -0
{mmgp-3.5.6.dist-info → mmgp-3.5.7.dist-info}/licenses/LICENSE.md +0 -0
{mmgp-3.5.6.dist-info → mmgp-3.5.7.dist-info}/top_level.txt +0 -0

mmgp/offload.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# ------------------ Memory Management 3.5.6 for the GPU Poor by DeepBeepMeep (mmgp)------------------
+# ------------------ Memory Management 3.5.7 for the GPU Poor by DeepBeepMeep (mmgp)------------------
 #
 # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ...  can run smoothly on a 24 GB GPU limited card.
 # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -122,8 +122,6 @@ class clock:
     def format_time_gap(self):
         return f"{self.stop_time - self.start_time:.2f}s"
 # useful functions to move a group of tensors (to design custom offload patches)
 def move_tensors(obj, device):
     if torch.is_tensor(obj):
@@ -668,7 +666,7 @@ def _welcome():
     if welcome_displayed:
          return
     welcome_displayed = True
-    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.6) by DeepBeepMeep ************{ENDC}{UNBOLD}")
+    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.7) by DeepBeepMeep ************{ENDC}{UNBOLD}")
 def change_dtype(model, new_dtype, exclude_buffers = False):
     for submodule_name, submodule in model.named_modules():
@@ -1295,7 +1293,7 @@ def move_loras_to_device(model, device="cpu" ):
         if ".lora_" in k:
             m.to(device)
-def fast_load_transformers_model(model_path: str,  do_quantize = False, quantizationType =  qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, defaultConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, modules = None,  return_shared_modules = None,  configKwargs ={}):
+def fast_load_transformers_model(model_path: str,  do_quantize = False, quantizationType =  qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, defaultConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, preprocess_sd  = None, modules = None,  return_shared_modules = None,  configKwargs ={}):
     """
     quick version of .LoadfromPretrained of  the transformers library
     used to build a model and load the corresponding weights (quantized or not)
@@ -1383,13 +1381,13 @@ def fast_load_transformers_model(model_path: str,  do_quantize = False, quantiza
     model._config = transformer_config
-    load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, writable_tensors =writable_tensors, modules = modules, return_shared_modules =  return_shared_modules, verboseLevel=verboseLevel )
+    load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, writable_tensors =writable_tensors, preprocess_sd = preprocess_sd , modules = modules, return_shared_modules =  return_shared_modules, verboseLevel=verboseLevel )
     return model
-def load_model_data(model, file_path, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, writable_tensors = True,  modules = None, return_shared_modules = None, verboseLevel = -1):
+def load_model_data(model, file_path, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, writable_tensors = True,  preprocess_sd = None, modules = None, return_shared_modules = None, verboseLevel = -1):
     """
     Load a model, detect if it has been previously quantized using quanto and do the extra setup if necessary
     """
@@ -1506,6 +1504,9 @@ def load_model_data(model, file_path, do_quantize = False, quantizationType = qi
     full_state_dict, full_quantization_map, full_tied_weights_map = None, None, None
     # deal if we are trying to load just a sub part of a larger model
+    if preprocess_sd != None:
+        state_dict, quantization_map = preprocess_sd(state_dict, quantization_map)
     if modelPrefix != None:
         base_model_prefix = modelPrefix + "."
         state_dict = filter_state_dict(state_dict,base_model_prefix)
@@ -2276,7 +2277,7 @@ class offload:
         setattr(target_module, "forward", functools.update_wrapper(functools.partial(check_empty_cuda_cache, target_module), previous_method) )
-    def hook_change_module(self, target_module, model, model_id, module_id, previous_method):
+    def hook_change_module(self, target_module, model, model_id, module_id, previous_method, previous_method_name ):
         if hasattr(target_module, "_lock_dtype"):
             dtype = target_module._lock_dtype
         else:
@@ -2289,11 +2290,12 @@ class offload:
                 args, kwargs = self.move_args_to_gpu(dtype, *args, **kwargs)
             return previous_method(*args, **kwargs)
-        if hasattr(target_module, "_mm_id"):
+        if hasattr(target_module, "_mm_" + previous_method_name):
             return
-        setattr(target_module, "_mm_id", model_id)
+        setattr(target_module, "_mm_Id", model_id)
+        setattr(target_module, "_mm_" + previous_method_name, previous_method)
-        setattr(target_module, "forward", functools.update_wrapper(functools.partial(check_change_module, target_module), previous_method) )
+        setattr(target_module, previous_method_name, functools.update_wrapper(functools.partial(check_change_module, target_module), previous_method) )
         if not self.verboseLevel >=1:
             return
@@ -2661,23 +2663,27 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
                         cur_blocks_prefix, prev_blocks_name, cur_blocks_seq, is_mod_seq = pre, None, num, False
                         cur_blocks_name = submodule_name
                         # print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
-            if hasattr(submodule, "forward"):
-                # if  any_lora and isinstance(submodule, ( torch.nn.Linear, torch.nn.Conv3d, torch.nn.LayerNorm)):
-                if  any_lora and  hasattr(submodule,"weight"):
+            top_submodule = len(submodule_name.split("."))==1
+            offload_hooks = submodule._offload_hooks if hasattr(submodule, "_offload_hooks") else []
+            if len(offload_hooks) > 0:
+                pass
+            assert top_submodule or len(offload_hooks) == 0, "custom offload hooks can only be set at the of the module"
+            submodule_method_names = ["forward"] +  offload_hooks
+            for submodule_method_name in submodule_method_names:
+                if not hasattr(submodule, submodule_method_name ): continue
+                if submodule_method_name == "forward" and any_lora and hasattr(submodule,"weight"):
                     submodule_method = self.hook_lora(submodule, current_model, model_id, loras_model_data, loras_model_shortcuts, submodule_name)
                 else:
-                    submodule_method = getattr(submodule, "forward")
-                if callable(submodule_method):
-                    if len(submodule_name.split("."))==1:
-                        self.hook_change_module(submodule, current_model, model_id, submodule_name, submodule_method)
+                    submodule_method = getattr(submodule, submodule_method_name)
+                if callable(submodule_method):
+                    if top_submodule and cur_blocks_name is None:
+                        self.hook_change_module(submodule, current_model, model_id, submodule_name, submodule_method, submodule_method_name)
                     elif compilationInThisOne and submodule in towers_modules:
                         self.hook_preload_blocks_for_compilation(submodule, model_id, cur_blocks_name, context = submodule_name )
                     else:
                         self.hook_check_empty_cache_needed(submodule, current_model, model_id, cur_blocks_name, submodule_method, context = submodule_name )
-                self.add_module_to_blocks(model_id, cur_blocks_name, submodule, prev_blocks_name, submodule_name)
+                    self.add_module_to_blocks(model_id, cur_blocks_name, submodule, prev_blocks_name, submodule_name)
         self.tune_preloading(model_id, current_budget, towers_names)

{mmgp-3.5.6.dist-info → mmgp-3.5.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mmgp
-Version: 3.5.6
+Version: 3.5.7
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
 <p align="center">
-  <H2>Memory Management 3.5.6 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.5.7 for the GPU Poor by DeepBeepMeep</H2>
 </p>

mmgp-3.5.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
+mmgp/offload.py,sha256=SKt-EunQrH6omBFI7aNLe82GIoXBKW9y1i0HMPFrKLY,127089
+mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
+mmgp-3.5.7.dist-info/licenses/LICENSE.md,sha256=DD-WIS0BkPoWJ_8hQO3J8hMP9K_1-dyrYv1YCbkxcDU,94
+mmgp-3.5.7.dist-info/METADATA,sha256=s420bK-WQuSZM2RpVwYjzXY-QmtIHkRbIiL9hAyV7sA,16309
+mmgp-3.5.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+mmgp-3.5.7.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
+mmgp-3.5.7.dist-info/RECORD,,

mmgp-3.5.6.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
-mmgp/offload.py,sha256=uoif7gOSNmWg5eqvMkmuVkTErNL6q_QJ0Lmm0QP7FLo,126305
-mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
-mmgp-3.5.6.dist-info/licenses/LICENSE.md,sha256=DD-WIS0BkPoWJ_8hQO3J8hMP9K_1-dyrYv1YCbkxcDU,94
-mmgp-3.5.6.dist-info/METADATA,sha256=hgR8mrkLImQWNkSU3ayt78df5whCozfVqzIUvV9jo1I,16309
-mmgp-3.5.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-mmgp-3.5.6.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
-mmgp-3.5.6.dist-info/RECORD,,

{mmgp-3.5.6.dist-info → mmgp-3.5.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{mmgp-3.5.6.dist-info → mmgp-3.5.7.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{mmgp-3.5.6.dist-info → mmgp-3.5.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

mmgp 3.5.6__py3-none-any.whl → 3.5.7__py3-none-any.whl

mmgp 3.5.6py3-none-any.whl → 3.5.7py3-none-any.whl