PyPI - mmgp - Versions diffs - 3.1.4.post1592__py3-none-any.whl → 3.1.4.post151926__py3-none-any.whl - Mend

mmgp 3.1.4.post1592py3-none-any.whl → 3.1.4.post151926py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (7) hide show

mmgp/offload.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# ------------------ Memory Management 3.1.4-1519 for the GPU Poor by DeepBeepMeep (mmgp)------------------
+# ------------------ Memory Management 3.1.4-15926 for the GPU Poor by DeepBeepMeep (mmgp)------------------
 #
 # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ...  can run smoothly on a 24 GB GPU limited card.
 # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -457,7 +457,7 @@ def _welcome():
     if welcome_displayed:
          return
     welcome_displayed = True
-    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.1.4-151) by DeepBeepMeep ************{ENDC}{UNBOLD}")
+    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.1.4-15192) by DeepBeepMeep ************{ENDC}{UNBOLD}")
 def _extract_num_from_str(num_in_str):
     size = len(num_in_str)
@@ -728,15 +728,20 @@ def _lora_linear_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor
                     continue
                 if self.use_dora[active_adapter]:
                     raise Exception("Dora not yet supported by mmgp")
                 lora_A = self.lora_A[active_adapter]
                 lora_B = self.lora_B[active_adapter]
+                dropout = self.lora_dropout[active_adapter]
                 scaling = self.scaling[active_adapter]
                 lora_A_weight = lora_A.weight
                 lora_B_weight = lora_B.weight
                 lora_BA = lora_B_weight @ lora_A_weight
                 base_weight += scaling * lora_BA
-            result = torch.nn.functional.linear(x, base_weight, bias=self.base_layer.bias)
+            if self.training:
+                result = torch.nn.functional.linear(dropout(x), base_weight, bias=self.base_layer.bias)
+            else:
+                result = torch.nn.functional.linear(x, base_weight, bias=self.base_layer.bias)
             torch_result_dtype = result.dtype
         else:
@@ -754,14 +759,18 @@ def _lora_linear_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor
                 x = x.to(lora_A.weight.dtype)
                 if not self.use_dora[active_adapter]:
-                    y = lora_A(x)
+                    if self.training:
+                        y = lora_A(dropout(x))
+                    else:
+                        y = lora_A(x)
                     y = lora_B(y)
                     y*= scaling
                     result+= y
                     del lora_A, lora_B, y
                     # result = result + lora_B(lora_A(dropout(x))) * scaling
                 else:
-                    if isinstance(dropout, nn.Identity) or not self.training:
+                    if isinstance(dropout, torch.nn.Identity) or not self.training:
                         base_result = result
                     else:
                         x = dropout(x)
@@ -1612,6 +1621,31 @@ class offload:
         if self.verboseLevel >=1:
             print(f"Async loading plan for model '{model_id}' : {(preload_total+base_size)/ONE_MB:0.2f} MB will be preloaded (base size of {base_size/ONE_MB:0.2f} MB + {preload_total/total_size*100:0.1f}% of recurrent layers data) with a {max_blocks_fetch/ONE_MB:0.2f} MB async" + (" circular" if len(towers) == 1 else "") + " shuttle")
+    def release(self):
+        global last_offload_obj
+        if last_offload_obj == self:
+            last_offload_obj = None
+        self.unload_all()
+        self.default_stream = None
+        keys= [k for k in self.blocks_of_modules.keys()]
+        for k in keys:
+            del self.blocks_of_modules[k]
+        self.blocks_of_modules = None
+        for model_id, model in self.models.items():
+            move_loras_to_device(model, "cpu")
+        self.models = None
+        gc.collect()
+        torch.cuda.empty_cache()
 def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = True,  extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, perc_reserved_mem_max = 0, coTenantsMap = None, verboseLevel = -1):
     """Hook to a pipeline or a group of modules in order to reduce their VRAM requirements:
@@ -1893,12 +1927,11 @@ def profile(pipe_or_dict_of_modules, profile_no: profile_type =  profile_type.Ve
     """Apply a configuration profile that depends on your hardware:
     pipe_or_dict_of_modules : the pipeline object or a dictionary of modules of the model
     profile_name : num of the profile:
-        HighRAM_HighVRAM_Fastest (=1): at least 48 GB of RAM and 24 GB of VRAM : the fastest well suited for a RTX 3090 / RTX 4090
-        HighRAM_LowVRAM_Fast (=2): at least 48 GB of RAM and 12 GB of VRAM : a bit slower, better suited for RTX 3070/3080/4070/4080
-            or for RTX 3090 / RTX 4090 with large pictures batches or long videos
-        LowRAM_HighVRAM_Medium (=3): at least 32 GB of RAM and 24 GB of VRAM : so so speed but adapted for RTX 3090 / RTX 4090 with limited RAM
-        LowRAM_LowVRAM_Slow (=4): at least 32 GB of RAM and 12 GB of VRAM : if have little VRAM or generate longer videos
-        VerylowRAM_LowVRAM_Slowest (=5): at least 24 GB of RAM and 10 GB of VRAM : if you don't have much it won't be fast but maybe it will work
+        HighRAM_HighVRAM_Fastest (=1): will try to load entirely a model  in VRAM and to keep a copy in reserved RAM for fast loading / unloading
+        HighRAM_LowVRAM_Fast (=2): will try to load only the needed parts of a model in VRAM and to keep a copy in reserved RAM for fast loading / unloading
+        LowRAM_HighVRAM_Medium (=3): will try to load entirely a model  in VRAM and to keep a copy in reserved RAM for fast loading / unloading, 8 bits quantization of main model
+        LowRAM_LowVRAM_Slow (=4): will try to load only the needed parts of a model in VRAM and to keep a copy in reserved RAM for fast loading / unloading, 8 bits quantization of main models
+        VerylowRAM_LowVRAM_Slowest (=5): will try to load only the needed parts of a model in VRAM, 8 bits quantization of main models
     overrideKwargs: every parameter accepted by Offload.All can be added here to override the profile choice
         For instance set quantizeTransformer = False to disable transformer quantization which is by default in every profile
     """
@@ -1942,21 +1975,21 @@ def profile(pipe_or_dict_of_modules, profile_no: profile_type =  profile_type.Ve
     if profile_no == profile_type.HighRAM_HighVRAM:
         pinnedMemory= True
         budgets = None
-        info = "You have chosen a profile that may require 48 GB of RAM and up to 24 GB of VRAM on some applications."
+        # info = "You have chosen a profile that may require 48 GB of RAM and up to 24 GB of VRAM on some applications."
     elif profile_no == profile_type.HighRAM_LowVRAM:
         pinnedMemory= True
         budgets["*"] =  3000
-        info = "You have chosen a profile that may require 48 GB of RAM and up to 12 GB of VRAM on some applications."
+        # info = "You have chosen a profile that may require 48 GB of RAM and up to 12 GB of VRAM on some applications."
     elif profile_no == profile_type.LowRAM_HighVRAM:
         pinnedMemory= "transformer"
         extraModelsToQuantize = default_extraModelsToQuantize
         budgets = None
-        info = "You have chosen a Medium speed profile that may require 32 GB of RAM and up to 24 GB of VRAM on some applications."
+        # info = "You have chosen a Medium speed profile that may require 32 GB of RAM and up to 24 GB of VRAM on some applications."
     elif profile_no == profile_type.LowRAM_LowVRAM:
         pinnedMemory= "transformer"
         extraModelsToQuantize = default_extraModelsToQuantize
         budgets["*"] =  3000
-        info = "You have chosen a profile that usually may require 32 GB of RAM and up to 12 GB of VRAM on some applications."
+        # info = "You have chosen a profile that usually may require 32 GB of RAM and up to 12 GB of VRAM on some applications."
     elif profile_no == profile_type.VerylowRAM_LowVRAM:
         pinnedMemory= False
         extraModelsToQuantize = default_extraModelsToQuantize
@@ -1964,11 +1997,11 @@ def profile(pipe_or_dict_of_modules, profile_no: profile_type =  profile_type.Ve
         if "transformer" in modules:
             budgets["transformer"] = 400
         #asyncTransfers = False
-        info = "You have chosen the slowest profile that may require 24 GB of RAM and up to 10 GB of VRAM on some applications."
+        # info = "You have chosen the slowest profile that may require 24 GB of RAM and up to 10 GB of VRAM on some applications."
     else:
         raise Exception("Unknown profile")
-    info += " Actual requirements may varry depending on the application or on the tuning done to the profile."
+    # info += " Actual requirements may varry depending on the application or on the tuning done to the profile."
+    info =""
     if budgets != None and len(budgets) == 0:
         budgets = None
@@ -1976,7 +2009,7 @@ def profile(pipe_or_dict_of_modules, profile_no: profile_type =  profile_type.Ve
     kwargs = { "pinnedMemory": pinnedMemory,  "extraModelsToQuantize" : extraModelsToQuantize, "budgets": budgets, "asyncTransfers" : asyncTransfers, "quantizeTransformer": quantizeTransformer   }
     if verboseLevel>=2:
-        info = info + CrLf + f"Profile '{profile_type.tostr(profile_no)}' sets the following options:"
+        info = info  + f"Profile '{profile_type.tostr(profile_no)}' sets the following options:" #CrLf
         for k,v in kwargs.items():
             if k in overrideKwargs:
                 info = info + CrLf + f"- '{k}':  '{kwargs[k]}' overriden with value '{overrideKwargs[k]}'"

{mmgp-3.1.4.post1592.dist-info → mmgp-3.1.4.post151926.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: mmgp
-Version: 3.1.4.post1592
+Version: 3.1.4.post151926
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                     GNU GENERAL PUBLIC LICENSE

mmgp-3.1.4.post151926.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
+mmgp/offload.py,sha256=gllpz2NNwJL_ESk-L1fMU96inOwh3WB4mu4NitGKyHI,87180
+mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
+mmgp-3.1.4.post151926.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
+mmgp-3.1.4.post151926.dist-info/METADATA,sha256=JHxiA5Y6w4OwT2LV8mZ-biMZ5Sk3UA87fAdD7O2cCko,15950
+mmgp-3.1.4.post151926.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+mmgp-3.1.4.post151926.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
+mmgp-3.1.4.post151926.dist-info/RECORD,,

mmgp-3.1.4.post1592.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
-mmgp/offload.py,sha256=dfmplgTm19DPJ8AKqOf8McaY2f63cz3Dqim_-Hvpcqo,86202
-mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
-mmgp-3.1.4.post1592.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
-mmgp-3.1.4.post1592.dist-info/METADATA,sha256=4PHdTr9MliaSu4UO5ET8GSD9sJRKqLyL8PMcbCcnhtQ,15948
-mmgp-3.1.4.post1592.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-mmgp-3.1.4.post1592.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
-mmgp-3.1.4.post1592.dist-info/RECORD,,

{mmgp-3.1.4.post1592.dist-info → mmgp-3.1.4.post151926.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mmgp-3.1.4.post1592.dist-info → mmgp-3.1.4.post151926.dist-info}/WHEEL RENAMED Viewed

File without changes

{mmgp-3.1.4.post1592.dist-info → mmgp-3.1.4.post151926.dist-info}/top_level.txt RENAMED Viewed

File without changes

mmgp 3.1.4.post1592__py3-none-any.whl → 3.1.4.post151926__py3-none-any.whl

Potentially problematic release.

mmgp 3.1.4.post1592py3-none-any.whl → 3.1.4.post151926py3-none-any.whl