PyPI - mmgp - Versions diffs - 3.0.1__py3-none-any.whl → 3.0.3__py3-none-any.whl - Mend

mmgp 3.0.1py3-none-any.whl → 3.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (7) hide show

mmgp/offload.py +20 -16
{mmgp-3.0.1.dist-info → mmgp-3.0.3.dist-info}/METADATA +6 -2
mmgp-3.0.3.dist-info/RECORD +9 -0
{mmgp-3.0.1.dist-info → mmgp-3.0.3.dist-info}/WHEEL +1 -1
mmgp-3.0.1.dist-info/RECORD +0 -9
{mmgp-3.0.1.dist-info → mmgp-3.0.3.dist-info}/LICENSE.md +0 -0
{mmgp-3.0.1.dist-info → mmgp-3.0.3.dist-info}/top_level.txt +0 -0

mmgp/offload.py CHANGED Viewed

@@ -260,6 +260,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, perc_reserved_mem_ma
         if include:
             params_list = params_list +  list(sub_module.buffers(recurse=False)) + list(sub_module.parameters(recurse=False))
+    # print(f"num params to pin {model_id}: {len(params_list)}")
     for p in params_list:
         if isinstance(p, QTensor):
             length = torch.numel(p._data) * p._data.element_size() + torch.numel(p._scale) * p._scale.element_size()
@@ -605,18 +606,18 @@ def _quantize(model_to_quantize, weights=qint8, verboseLevel = 1, threshold = 10
     # force read non quantized parameters so that their lazy tensors and corresponding mmap are released
     # otherwise we may end up to keep in memory both the quantized and the non quantize model
-    for name, m in model_to_quantize.named_modules():
+    for m in model_to_quantize.modules():
         # do not read quantized weights (detected them directly or behind an adapter)
-        if isinstance(m, QModuleMixin) or hasattr(m, "base_layer") and  isinstance(m.base_layer, QModuleMixin):
-            pass
+        if isinstance(m, QModuleMixin) or hasattr(m, "base_layer") and  isinstance(m.base_layer, QModuleMixin):
+            if hasattr(m, "bias") and m.bias is not None:
+                m.bias.data = m.bias.data + 0
         else:
-            if hasattr(m, "weight") and m.weight is not None:
-                m.weight.data = m.weight.data + 0
+            for n, p in m.named_parameters(recurse = False):
+                data = getattr(m, n)
+                setattr(m,n, torch.nn.Parameter(data + 0 ) )
-        if hasattr(m, "bias") and m.bias is not None:
-            m.bias.data = m.bias.data + 0
+        for b in m.buffers(recurse = False):
+            b.data = b.data + 0
     freeze(model_to_quantize)
@@ -974,6 +975,8 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
     quick version of .LoadfromPretrained of  the transformers library
     used to build a model and load the corresponding weights (quantized or not)
     """
     import os.path
     from accelerate import init_empty_weights
@@ -1313,6 +1316,13 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
                     submodel.forward= torch.compile(submodel.forward,  backend= "inductor", mode="default" ) # , fullgraph= True, mode= "reduce-overhead", "max-autotune", "max-autotune-no-cudagraphs",
                     #dynamic=True,
+        if pinAllModels or model_id in modelsToPin:
+            if hasattr(current_model,"_already_pinned"):
+                if self.verboseLevel >=1:
+                    print(f"Model '{model_id}' already pinned to reserved memory")
+            else:
+                _pin_to_memory(current_model, model_id, partialPinning= partialPinning, perc_reserved_mem_max=perc_reserved_mem_max, verboseLevel=verboseLevel)
         for submodule_name, submodule in current_model.named_modules():
             # create a fake 'accelerate' parameter so that the _execution_device property returns always "cuda"
             # (it is queried in many pipelines even if offloading is not properly implemented)
@@ -1358,12 +1368,6 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
                     current_size = self.add_module_to_blocks(model_id, cur_blocks_name, submodule, prev_blocks_name)
-        if pinAllModels or model_id in modelsToPin:
-            if hasattr(current_model,"_already_pinned"):
-                if self.verboseLevel >=1:
-                    print(f"Model '{model_id}' already pinned to reserved memory")
-            else:
-                _pin_to_memory(current_model, model_id, partialPinning= partialPinning, perc_reserved_mem_max=perc_reserved_mem_max, verboseLevel=verboseLevel)
@@ -1422,6 +1426,7 @@ def profile(pipe_or_dict_of_modules, profile_no: profile_type =  profile_type.Ve
     default_budgets = { "transformer" : 600 , "text_encoder": 3000, "text_encoder_2": 3000 }
     extraModelsToQuantize = None
+    asyncTransfers = True
     if profile_no == profile_type.HighRAM_HighVRAM:
         pinnedMemory= True
@@ -1439,7 +1444,6 @@ def profile(pipe_or_dict_of_modules, profile_no: profile_type =  profile_type.Ve
         pinnedMemory= "transformer"
         extraModelsToQuantize = default_extraModelsToQuantize
         budgets=default_budgets
-        asyncTransfers = True
         info = "You have chosen a profile that requires at least 32 GB of RAM and 12 GB of VRAM. Some RAM is consumed to reduce VRAM consumption. "
     elif profile_no == profile_type.VerylowRAM_LowVRAM:
         pinnedMemory= False

{mmgp-3.0.1.dist-info → mmgp-3.0.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mmgp
-Version: 3.0.1
+Version: 3.0.3
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                     GNU GENERAL PUBLIC LICENSE
@@ -69,7 +69,8 @@ You can choose between 5 profiles depending on your hardware:
 - VerylowRAM_LowVRAM  (5): at least 24 GB of RAM and 10 GB of VRAM : if you don't have much it won't be fast but maybe it will work
 Profile 2 (High RAM) and 4 (Low RAM)are the most recommended profiles since they are versatile (support for long videos for a slight performance cost).\
-However, a safe approach is to start from profile 5 (default profile) and then go down progressively to profile 4 and then to profile 2 as long as the app remains responsive or doesn't trigger any out of memory error.
+If you use Flux derived applciation profile 1 and 3 will offer much faster generation times.
+In any case, a safe approach is to start from profile 5 (default profile) and then go down progressively to profile 4 and then to profile 2 as long as the app remains responsive or doesn't trigger any out of memory error.
 By default the 'transformer' will be quantized to 8 bits for all profiles. If you don't want that you may specify the optional parameter *quantizeTransformer = False*.
@@ -80,6 +81,9 @@ Every parameter set automatically by a profile can be overridden with one or mul
 ```
 If you want to know which parameter are set by one specific profile you can use the parameter *verboseLevel=2*
+**It is highly recommended to put the *from mmgp import offload, profile_type* at the top of your main python file (that is as the first import) so that all the existing safetensors calls are redirected to mmpg.**
 ## Alternatively you may want to create your own profile with specific parameters:
 For example:

mmgp-3.0.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
+mmgp/offload.py,sha256=N_n12QJmZlPRbZiYl6BQVfmJaqxxIbiCKkT6w-2CVo4,61781
+mmgp/safetensors2.py,sha256=CSv8HdrjURUzBazpaBDU1WNwUL1lhzpCyzG0GWygbGE,13602
+mmgp-3.0.3.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
+mmgp-3.0.3.dist-info/METADATA,sha256=0dw13_XUzNPCV6VL-e5FAjvMIUDDT1ffFf7rLG_34zc,12079
+mmgp-3.0.3.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
+mmgp-3.0.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
+mmgp-3.0.3.dist-info/RECORD,,

{mmgp-3.0.1.dist-info → mmgp-3.0.3.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.6.0)
+Generator: setuptools (75.7.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

mmgp-3.0.1.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
-mmgp/offload.py,sha256=T9RBAibAyAnKV-8AiYmop_UOGl_N1l5EJo5ucCZfxK8,61611
-mmgp/safetensors2.py,sha256=CSv8HdrjURUzBazpaBDU1WNwUL1lhzpCyzG0GWygbGE,13602
-mmgp-3.0.1.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
-mmgp-3.0.1.dist-info/METADATA,sha256=uSsBc5pBaYBL4Ek3TR99J9hP7AQQlwnnUM_JQlkNwbE,11765
-mmgp-3.0.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-mmgp-3.0.1.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
-mmgp-3.0.1.dist-info/RECORD,,

{mmgp-3.0.1.dist-info → mmgp-3.0.3.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mmgp-3.0.1.dist-info → mmgp-3.0.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

mmgp 3.0.1__py3-none-any.whl → 3.0.3__py3-none-any.whl

Potentially problematic release.

mmgp 3.0.1py3-none-any.whl → 3.0.3py3-none-any.whl