PyPI - mmgp - Versions diffs - 3.1.1__py3-none-any.whl → 3.1.3__py3-none-any.whl - Mend

mmgp 3.1.1py3-none-any.whl → 3.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (7) hide show

mmgp/offload.py +62 -24
{mmgp-3.1.1.dist-info → mmgp-3.1.3.dist-info}/METADATA +1 -1
mmgp-3.1.3.dist-info/RECORD +9 -0
mmgp-3.1.1.dist-info/RECORD +0 -9
{mmgp-3.1.1.dist-info → mmgp-3.1.3.dist-info}/LICENSE.md +0 -0
{mmgp-3.1.1.dist-info → mmgp-3.1.3.dist-info}/WHEEL +0 -0
{mmgp-3.1.1.dist-info → mmgp-3.1.3.dist-info}/top_level.txt +0 -0

mmgp/offload.py CHANGED Viewed

@@ -576,7 +576,7 @@ def _quantize(model_to_quantize, weights=qint8, verboseLevel = 1, threshold = 10
     if hasattr(model_to_quantize, "_quanto_map"):
         for k, entry in model_to_quantize._quanto_map.items():
             weights  =  entry["weights"]
-            print(f"Model '{model_id}' is already quantized in format '{weights}'")
+            print(f"Model '{model_id}' is already quantized to format '{weights}'")
             return False
         print(f"Model '{model_id}' is already quantized")
         return False
@@ -680,7 +680,7 @@ def _quantize(model_to_quantize, weights=qint8, verboseLevel = 1, threshold = 10
     return True
-def load_loras_into_model(model, lora_path, lora_multi = None, verboseLevel = -1):
+def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, verboseLevel = -1,):
     verboseLevel = _compute_verbose_level(verboseLevel)
     if inject_adapter_in_model == None or set_weights_and_activate_adapters == None or  get_peft_kwargs == None:
@@ -731,9 +731,6 @@ def load_loras_into_model(model, lora_path, lora_multi = None, verboseLevel = -1
         # is_correct_format = all("lora" in key for key in state_dict.keys())
         # check with first key if is not in peft format
         # first_key = next(iter(state_dict.keys()))
         # if "lora_A" not in first_key:
@@ -770,7 +767,17 @@ def load_loras_into_model(model, lora_path, lora_multi = None, verboseLevel = -1
                 pass
         if verboseLevel >=1:
             print(f"Lora '{path}' was loaded in model '{_get_module_name(model)}'")
-    set_weights_and_activate_adapters(model,[ str(i) for i in range(len(lora_multi))], lora_multi)
+    if activate_all_loras:
+        set_weights_and_activate_adapters(model,[ str(i) for i in range(len(lora_multi))], lora_multi)
+def activate_loras(model, lora_nos, lora_multi = None ):
+    if not isinstance(lora_nos, list):
+        lora_nos = [lora_nos]
+    lora_nos = [str(l) for l in lora_nos]
+    if lora_multi is None:
+        lora_multi = [1. for _ in lora_nos]
+    set_weights_and_activate_adapters(model, lora_nos, lora_multi)
 def move_loras_to_device(model, device="cpu" ):
     if hasattr( model, "_lora_loadable_modules"):
@@ -979,14 +986,13 @@ class offload:
             self.blocks_of_modules[entry_name] = blocks_params
             blocks_params_size = 0
             if blocks_name !=None:
                 prev_entry_name = None if prev_block_name == None else  model_id + "/" + prev_block_name
                 self.prev_blocks_names[entry_name] =  prev_entry_name
                 if not prev_block_name == None:
                     self.next_blocks_names[prev_entry_name] = entry_name
         for k,p in submodule.named_parameters(recurse=False):
             if isinstance(p, QTensor):
                 blocks_params.append( (submodule, k, p, False ) )
@@ -1268,7 +1274,7 @@ class offload:
         if module_id == None or module_id =='':
             model_name = model._get_name()
-            print(f"Hooked in model '{model_id}' ({model_name})")
+            print(f"Hooked to model '{model_id}' ({model_name})")
 def save_model(model, file_path, do_quantize = False, quantizationType = qint8, verboseLevel = -1, config_file_path = None ):
@@ -1311,7 +1317,29 @@ def save_model(model, file_path, do_quantize = False, quantizationType = qint8,
         print(f"File '{file_path}' saved")
+def extract_models(prefix, obj):
+    pipe = {}
+    for name in dir(obj):
+        element = getattr(obj,name)
+        if name  in ("pipeline", "pipe"):
+            pipeline = element
+            if  hasattr(pipeline , "components") and isinstance(pipeline.components, dict):
+                for k, model in pipeline.components.items():
+                    if model != None:
+                        pipe[prefix  + "/" + k ] = model
+        elif isinstance(element, torch.nn.Module):
+            if prefix  + "/" + name in pipe:
+                pipe[prefix  + "/_" + name ] = element
+            else:
+                pipe[prefix  + "/" + name ] = element
+        elif isinstance(element, dict):
+            for k, element in element.items():
+                if  hasattr(element , "pipeline"):
+                    pipe.update( extract_models(prefix + "/" + k,element ))
+    return pipe
 def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = True,  extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, asyncTransfers = True, compile = False, perc_reserved_mem_max = 0, verboseLevel = -1):
     """Hook to a pipeline or a group of modules in order to reduce their VRAM requirements:
@@ -1337,6 +1365,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
     if not budgets is None:
         if isinstance(budgets , dict):
             model_budgets = budgets
+            budget = budgets.get("*", 0) * ONE_MB
         else:
             budget = int(budgets) * ONE_MB
@@ -1451,7 +1480,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
         if  model_budget > 0 and model_budget > current_model_size:
             model_budget = 0
-        model_budgets[model_id] = model_budget
+        model_budgets[model_id] = model_budget #/ 2 if asyncTransfers else model_budget
     partialPinning = False
@@ -1495,10 +1524,11 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
             if  not hasattr(submodule, "_hf_hook"):
                 setattr(submodule, "_hf_hook", HfHook())
-            if submodule_name=='':
-                continue
-            if current_budget > 0:
+            # if submodule_name=='':
+            #     continue
+            if current_budget > 0 and len(submodule_name) > 0:
                 if cur_blocks_prefix != None:
                     if submodule_name.startswith(cur_blocks_prefix):
                         depth_prefix = cur_blocks_prefix.split(".")
@@ -1508,7 +1538,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
                         if num != cur_blocks_seq and (cur_blocks_seq == -1 or current_size > current_budget):
                             prev_blocks_name = cur_blocks_name
                             cur_blocks_name =  cur_blocks_prefix + str(num)
-                            # print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
+                            print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
                         cur_blocks_seq = num
                     else:
                         cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq = None, None, None, -1
@@ -1520,7 +1550,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
                     elif num >=0:
                         cur_blocks_prefix, prev_blocks_name, cur_blocks_seq = pre, None, num
                         cur_blocks_name = submodule_name
-                        # print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
+                        print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
             if hasattr(submodule, "forward"):
@@ -1590,37 +1620,45 @@ def profile(pipe_or_dict_of_modules, profile_no: profile_type =  profile_type.Ve
     # transformer (video or image generator) should be as small as possible not to occupy space that could be used by actual image data
     # on the other hand the text encoder should be quite large (as long as it fits in 10 GB of VRAM) to reduce sequence offloading
-    default_budgets = { "transformer" : 600 , "text_encoder": 3000, "text_encoder_2": 3000 }
+    budgets = {}
+    if "transformer" in modules:
+        budgets["transformer"] = 1200
     extraModelsToQuantize = None
     asyncTransfers = True
-    budgets = None
     if profile_no == profile_type.HighRAM_HighVRAM:
         pinnedMemory= True
         budgets = None
-        info = "You have chosen a profile that requires at least 48 GB of RAM and 24 GB of VRAM. Some VRAM is consuming just to make the model runs faster."
+        info = "You have chosen a profile that requires at least 48 GB of RAM and 24 GB of VRAM. Some VRAM is consumed just to make the model runs faster."
     elif profile_no == profile_type.HighRAM_LowVRAM:
         pinnedMemory= True
-        budgets = default_budgets
+        budgets["*"] =  3000
         info = "You have chosen a profile that requires at least 48 GB of RAM and 12 GB of VRAM. Some RAM is consumed to reduce VRAM consumption."
     elif profile_no == profile_type.LowRAM_HighVRAM:
         pinnedMemory= "transformer"
         extraModelsToQuantize = default_extraModelsToQuantize
+        budgets = None
         info = "You have chosen a Medium speed profile that requires at least 32 GB of RAM and 24 GB of VRAM. Some VRAM is consuming just to make the model runs faster"
     elif profile_no == profile_type.LowRAM_LowVRAM:
         pinnedMemory= "transformer"
         extraModelsToQuantize = default_extraModelsToQuantize
-        budgets=default_budgets
+        budgets["*"] =  3000
         info = "You have chosen a profile that requires at least 32 GB of RAM and 12 GB of VRAM. Some RAM is consumed to reduce VRAM consumption. "
     elif profile_no == profile_type.VerylowRAM_LowVRAM:
         pinnedMemory= False
         extraModelsToQuantize = default_extraModelsToQuantize
-        budgets=default_budgets
-        budgets["transformer"] = 400
-        asyncTransfers = False
+        budgets["*"] =  3000
+        if "transformer" in modules:
+            budgets["transformer"] = 400
+        #asyncTransfers = False
         info = "You have chosen the slowest profile that requires at least 24 GB of RAM and 10 GB of VRAM."
     else:
         raise Exception("Unknown profile")
+    if budgets != None and len(budgets) == 0:
+        budgets = None
     CrLf = '\r\n'
     kwargs = { "pinnedMemory": pinnedMemory,  "extraModelsToQuantize" : extraModelsToQuantize, "budgets": budgets, "asyncTransfers" : asyncTransfers, "quantizeTransformer": quantizeTransformer   }

{mmgp-3.1.1.dist-info → mmgp-3.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: mmgp
-Version: 3.1.1
+Version: 3.1.3
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                     GNU GENERAL PUBLIC LICENSE

mmgp-3.1.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
+mmgp/offload.py,sha256=JB40Ky84Njhuf2BauLvNhH_-IS_27lhfYuLqVVhmJtA,71080
+mmgp/safetensors2.py,sha256=OkJAvENfWeb-PL0FcxS1-eYeHLbemTaNXYvNxURrzIs,16154
+mmgp-3.1.3.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
+mmgp-3.1.3.dist-info/METADATA,sha256=pfkzWdQKY-7wNEMN66pwUPxfmXDGZSjJpBwvYolUDb4,12708
+mmgp-3.1.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+mmgp-3.1.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
+mmgp-3.1.3.dist-info/RECORD,,

mmgp-3.1.1.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
-mmgp/offload.py,sha256=h74eKyWVZmDM--l4KbiZYXdpkcGM8ySUgyvkFtFRtNQ,69593
-mmgp/safetensors2.py,sha256=OkJAvENfWeb-PL0FcxS1-eYeHLbemTaNXYvNxURrzIs,16154
-mmgp-3.1.1.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
-mmgp-3.1.1.dist-info/METADATA,sha256=wtHNzulNFaWmruVO4cGgcRuIIN2eHPHo47nkgGMOWqw,12708
-mmgp-3.1.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-mmgp-3.1.1.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
-mmgp-3.1.1.dist-info/RECORD,,

{mmgp-3.1.1.dist-info → mmgp-3.1.3.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mmgp-3.1.1.dist-info → mmgp-3.1.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{mmgp-3.1.1.dist-info → mmgp-3.1.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

mmgp 3.1.1__py3-none-any.whl → 3.1.3__py3-none-any.whl

Potentially problematic release.

mmgp 3.1.1py3-none-any.whl → 3.1.3py3-none-any.whl