PyPI - mmgp - Versions diffs - 3.2.5__py3-none-any.whl → 3.2.6__py3-none-any.whl - Mend

mmgp 3.2.5py3-none-any.whl → 3.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (7) hide show

mmgp/offload.py +60 -11
{mmgp-3.2.5.dist-info → mmgp-3.2.6.dist-info}/METADATA +2 -2
mmgp-3.2.6.dist-info/RECORD +9 -0
{mmgp-3.2.5.dist-info → mmgp-3.2.6.dist-info}/WHEEL +1 -1
mmgp-3.2.5.dist-info/RECORD +0 -9
{mmgp-3.2.5.dist-info → mmgp-3.2.6.dist-info}/LICENSE.md +0 -0
{mmgp-3.2.5.dist-info → mmgp-3.2.6.dist-info}/top_level.txt +0 -0

mmgp/offload.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# ------------------ Memory Management 3.2.5 for the GPU Poor by DeepBeepMeep (mmgp)------------------
+# ------------------ Memory Management 3.2.6 for the GPU Poor by DeepBeepMeep (mmgp)------------------
 #
 # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ...  can run smoothly on a 24 GB GPU limited card.
 # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -61,7 +61,12 @@ import sys
 import os
 import json
 import psutil
-try:
+from accelerate import init_empty_weights
+try:
+    from peft.tuners.tuners_utils  import BaseTuner
     from diffusers.utils.peft_utils import set_weights_and_activate_adapters, get_peft_kwargs
 except:
     set_weights_and_activate_adapters = None
@@ -297,12 +302,13 @@ def _get_tensor_ref(p):
         return p.data_ptr()
-def _pin_to_memory(model, model_id, partialPinning = False, verboseLevel = 1):
+def _pin_to_memory(model, model_id, partialPinning = False, pinnedLora = True, verboseLevel = 1):
     if partialPinning:
         towers_names, _ = _detect_main_towers(model)
-    BIG_TENSOR_MAX_SIZE = 2**28 # 256 MB
+    # BIG_TENSOR_MAX_SIZE = 2**28 # 256 MB
+    BIG_TENSOR_MAX_SIZE = 2**27 # 128 MB
     current_big_tensor_size = 0
     big_tensor_no  = 0
     big_tensors_sizes = []
@@ -314,6 +320,9 @@ def _pin_to_memory(model, model_id, partialPinning = False, verboseLevel = 1):
         include = True
         if partialPinning:
             include = any(k.startswith(pre) for pre in towers_names) if partialPinning else True
+        if include and not pinnedLora and ".lora_" in k:
+            include = False
         if include:
             params_dict.update( { k + '.' + n : (p,  False) for n, p in sub_module.named_parameters(recurse=False) }  )
             params_dict.update( { k + '.' + n : (b,  True) for n, b in sub_module.named_buffers(recurse=False) }  )
@@ -479,7 +488,7 @@ def _welcome():
     if welcome_displayed:
          return
     welcome_displayed = True
-    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.5) by DeepBeepMeep ************{ENDC}{UNBOLD}")
+    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.6) by DeepBeepMeep ************{ENDC}{UNBOLD}")
 def _extract_num_from_str(num_in_str):
     size = len(num_in_str)
@@ -800,7 +809,7 @@ def _lora_linear_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor
                 scaling = get_scaling(active_adapter)
                 lora_A_weight = lora_A.weight
                 lora_B_weight = lora_B.weight
-                if new_weights:
+                if new_weights or True:
                     base_weight = torch.addmm(base_weight, lora_B_weight, lora_A_weight, alpha= scaling )
                     # base_weight = base_weight + scaling * lora_B_weight @ lora_A_weight
                 else:
@@ -857,7 +866,47 @@ def _lora_linear_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor
         result = result.to(torch_result_dtype)
         return result
+def _inject_adapter(
+        self, model: torch.nn.Module, adapter_name: str, autocast_adapter_dtype: bool = True, low_cpu_mem_usage: bool = False
+    ) -> None:
+    def _get_submodules(model, key):
+        parent = model.get_submodule(".".join(key.split(".")[:-1]))
+        target_name = key.split(".")[-1]
+        target = model.get_submodule(key)
+        return parent, target, target_name
+    peft_config = self.peft_config[adapter_name]
+    self._check_new_adapter_config(peft_config)
+    model_config = self.get_model_config(model)
+    peft_config = self._prepare_adapter_config(peft_config, model_config)
+    self._prepare_model(peft_config, model)
+    target_modules = peft_config.target_modules.copy()
+    # unexpected_modules = []
+    for key, target in model.named_modules():
+        if not key:
+            continue
+        if  key in target_modules:
+            target_modules.remove(key)
+            self.targeted_module_names.append(key)
+            # pos = key.rfind(".")
+            # parent = key[:pos]
+            # target_name = key[pos+1:]
+            parent, target, target_name = _get_submodules(model, key)
+            with init_empty_weights():
+                self._create_and_replace(peft_config, adapter_name, target, target_name, parent, current_key=key)
+    self.set_adapter(self.active_adapters)
+    self._mark_only_adapters_as_trainable(model)
+    return target_modules
 def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, split_linear_modules_map = None, preprocess_sd = None, verboseLevel = -1,):
     verboseLevel = _compute_verbose_level(verboseLevel)
@@ -866,6 +915,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
     from peft.tuners.lora import Linear
     Linear.forward = _lora_linear_forward
+    BaseTuner.inject_adapter = _inject_adapter
     if not isinstance(lora_path, list):
         lora_path = [lora_path]
@@ -979,7 +1029,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
             unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
             if unexpected_keys:
                 raise Exception(f"Lora '{path}' contains invalid keys '{unexpected_keys}'")
         if verboseLevel >=1:
             print(f"Lora '{path}' was loaded in model '{_get_module_name(model)}'")
     if activate_all_loras:
@@ -1025,7 +1075,6 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
     import os.path
-    from accelerate import init_empty_weights
     if not (model_path.endswith(".sft") or model_path.endswith(".safetensors")):
         raise Exception("full model path to file expected")
@@ -1811,7 +1860,7 @@ class offload:
-def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = True,  extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, perc_reserved_mem_max = 0, coTenantsMap = None, verboseLevel = -1):
+def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedLora = False, quantizeTransformer = True,  extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, perc_reserved_mem_max = 0, coTenantsMap = None, verboseLevel = -1):
     """Hook to a pipeline or a group of modules in order to reduce their VRAM requirements:
     pipe_or_dict_of_modules : the pipeline object or a dictionary of modules of the model
     quantizeTransformer: set True by default will quantize on the fly the video / image model
@@ -2010,7 +2059,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
                 if self.verboseLevel >=1:
                     print(f"Model '{model_id}' already pinned to reserved memory")
             else:
-                _pin_to_memory(current_model, model_id, partialPinning= partialPinning, verboseLevel=verboseLevel)
+                _pin_to_memory(current_model, model_id, partialPinning= partialPinning, pinnedLora = pinnedLora, verboseLevel=verboseLevel)
         current_budget = model_budgets[model_id]
         cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False

{mmgp-3.2.5.dist-info → mmgp-3.2.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: mmgp
-Version: 3.2.5
+Version: 3.2.6
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                     GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Requires-Dist: peft
 <p align="center">
-  <H2>Memory Management 3.2.4 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.2.6 for the GPU Poor by DeepBeepMeep</H2>
 </p>

mmgp-3.2.6.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
+mmgp/offload.py,sha256=sN95BJAvdWOu36AWwJlACdxMDiOzeqL2HXLN90oaec4,98169
+mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
+mmgp-3.2.6.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
+mmgp-3.2.6.dist-info/METADATA,sha256=F7LmNAvBTLEEfFT-Wbh7md4s1U4Vdnt4RrBfuBXpH_s,16151
+mmgp-3.2.6.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
+mmgp-3.2.6.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
+mmgp-3.2.6.dist-info/RECORD,,

{mmgp-3.2.5.dist-info → mmgp-3.2.6.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.2)
+Generator: setuptools (76.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

mmgp-3.2.5.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
-mmgp/offload.py,sha256=XQOTMMp5UQku3byZwDr_dYgD3tK4DNTZkwotVyPg-Lk,96434
-mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
-mmgp-3.2.5.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
-mmgp-3.2.5.dist-info/METADATA,sha256=s6c1X2ar9DQH1CiLAHdO5X60fuNfKqfmqu-xL_W6j5s,16151
-mmgp-3.2.5.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
-mmgp-3.2.5.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
-mmgp-3.2.5.dist-info/RECORD,,

{mmgp-3.2.5.dist-info → mmgp-3.2.6.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mmgp-3.2.5.dist-info → mmgp-3.2.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

mmgp 3.2.5__py3-none-any.whl → 3.2.6__py3-none-any.whl

Potentially problematic release.

mmgp 3.2.5py3-none-any.whl → 3.2.6py3-none-any.whl