mmgp 3.3.3__py3-none-any.whl → 3.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

mmgp/offload.py CHANGED
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.3.3 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.3.4 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -591,7 +591,7 @@ def _welcome():
591
591
  if welcome_displayed:
592
592
  return
593
593
  welcome_displayed = True
594
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.3.3) by DeepBeepMeep ************{ENDC}{UNBOLD}")
594
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.3.4) by DeepBeepMeep ************{ENDC}{UNBOLD}")
595
595
 
596
596
  def _extract_num_from_str(num_in_str):
597
597
  size = len(num_in_str)
@@ -877,17 +877,15 @@ def split_linear_modules(model, map ):
877
877
 
878
878
  def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, check_only = False, ignore_model_variations = False, pinnedLora = False, split_linear_modules_map = None, preprocess_sd = None, verboseLevel = -1,):
879
879
  verboseLevel = _compute_verbose_level(verboseLevel)
880
- modules_dict = {k: v for k,v in model.named_modules()}
881
880
 
881
+ loras_model_data = getattr(model, "_loras_model_data", None)
882
+ if loras_model_data == None:
883
+ raise Exception(f"No Loras has been declared for this model while creating the corresponding offload object")
884
+
882
885
  if not check_only:
883
- loras_model_data = dict()
884
- model._loras_model_data = loras_model_data
885
- loras_active_adapters = set()
886
- model._loras_active_adapters = loras_active_adapters
887
- loras_scaling = dict()
888
- model._loras_scaling = loras_scaling
889
- loras_tied_weights = dict()
890
- model._loras_tied_weights = loras_tied_weights
886
+ unload_loras_from_model(model)
887
+
888
+ modules_dict = {k: v for k,v in model.named_modules()}
891
889
 
892
890
  CrLf = '\r\n'
893
891
  error_msg = ""
@@ -927,9 +925,6 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
927
925
  skip = False
928
926
  state_dict = safetensors2.torch_load_file(path, writable_tensors= False)
929
927
 
930
-
931
-
932
-
933
928
  if preprocess_sd != None:
934
929
  state_dict = preprocess_sd(state_dict)
935
930
 
@@ -1045,9 +1040,10 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1045
1040
  break
1046
1041
  if not check_only:
1047
1042
  loras_module_data = loras_model_data.get(module, None)
1048
- if loras_module_data == None:
1049
- loras_module_data = dict()
1050
- loras_model_data[module] = loras_module_data
1043
+ assert loras_module_data != None
1044
+ # if loras_module_data == None:
1045
+ # loras_module_data = dict()
1046
+ # loras_model_data[module] = loras_module_data
1051
1047
  loras_adapter_data = loras_module_data.get(adapter_name, None)
1052
1048
  lora_A = None if lora_A == None else lora_A.to(torch.bfloat16)
1053
1049
  lora_B = None if lora_B == None else lora_B.to(torch.bfloat16)
@@ -1108,12 +1104,17 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1108
1104
  return new_lora_path
1109
1105
 
1110
1106
  def unload_loras_from_model(model):
1111
- model._loras_model_data = None
1107
+ for _, v in model._loras_model_data.items():
1108
+ v.clear()
1109
+
1110
+ model._loras_active_adapters = set()
1111
+ model._loras_scaling = dict()
1112
+ model._loras_tied_weights = dict()
1112
1113
  model._loras_errors = None
1113
1114
  model._loras_adapters = None
1114
- model._loras_active_adapters = None
1115
1115
  model._loras_scaling = None
1116
1116
 
1117
+
1117
1118
  def set_step_no_for_lora(model, step_no):
1118
1119
  model._lora_step_no = step_no
1119
1120
 
@@ -1857,14 +1858,14 @@ class offload:
1857
1858
  return result
1858
1859
 
1859
1860
 
1860
- def hook_lora_linear(self, submodule, current_model, model_id, submodule_name):
1861
+ def hook_lora_linear(self, submodule, current_model, model_id, loras_model_data, submodule_name):
1861
1862
  old_forward = submodule.forward
1863
+
1864
+ loras_data = {}
1865
+ loras_model_data[submodule] = loras_data
1866
+
1862
1867
  def lora_linear_forward(module, *args, **kwargs):
1863
- loras_model_data = getattr(current_model, "_loras_model_data", None)
1864
- loras_data = None
1865
- if loras_model_data != None:
1866
- loras_data = loras_model_data.get(submodule, None)
1867
- if loras_data == None:
1868
+ if len(loras_data) == 0:
1868
1869
  return old_forward(*args, **kwargs)
1869
1870
  else:
1870
1871
  return self._lora_linear_forward(current_model, submodule, loras_data, *args, **kwargs)
@@ -2271,7 +2272,10 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2271
2272
  current_budget = model_budgets[model_id]
2272
2273
  cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
2273
2274
  self.loaded_blocks[model_id] = None
2274
- any_lora = loras !=None and model_id in loras or getattr(current_model, "_loras_model_data", False)
2275
+ any_lora = loras !=None and model_id in loras
2276
+ if any_lora:
2277
+ loras_model_data = {}
2278
+ current_model._loras_model_data = loras_model_data
2275
2279
  for submodule_name, submodule in current_model.named_modules():
2276
2280
  # create a fake 'accelerate' parameter so that the _execution_device property returns always "cuda"
2277
2281
  # (it is queried in many pipelines even if offloading is not properly implemented)
@@ -2304,7 +2308,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2304
2308
 
2305
2309
  if hasattr(submodule, "forward"):
2306
2310
  if any_lora and isinstance(submodule, torch.nn.Linear):
2307
- submodule_method = self.hook_lora_linear(submodule, current_model, model_id, submodule_name)
2311
+ submodule_method = self.hook_lora_linear(submodule, current_model, model_id, loras_model_data, submodule_name)
2308
2312
  else:
2309
2313
  submodule_method = getattr(submodule, "forward")
2310
2314
  if callable(submodule_method):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.3.3
3
+ Version: 3.3.4
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Dynamic: license-file
17
17
 
18
18
 
19
19
  <p align="center">
20
- <H2>Memory Management 3.3.3 for the GPU Poor by DeepBeepMeep</H2>
20
+ <H2>Memory Management 3.3.4 for the GPU Poor by DeepBeepMeep</H2>
21
21
  </p>
22
22
 
23
23
 
@@ -0,0 +1,9 @@
1
+ __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
+ mmgp/offload.py,sha256=WpQK1af2g0qcAm32EguTX8oBHZGKumPX2EqYS-df69Y,106583
4
+ mmgp/safetensors2.py,sha256=rmUbBmK3Dra5prUTTRSVi6-XUFAa9Mj6B5CNPgzt9To,17333
5
+ mmgp-3.3.4.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
+ mmgp-3.3.4.dist-info/METADATA,sha256=Yk2eSpNITRDHK0lclsP6VXhW0_5hkUNVvXSfk25f7Ds,16154
7
+ mmgp-3.3.4.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
8
+ mmgp-3.3.4.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
+ mmgp-3.3.4.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
- mmgp/offload.py,sha256=R0UbOXEGAFKd_6090o8v5CkVmJiWmHDQsww7A3-LZEU,106550
4
- mmgp/safetensors2.py,sha256=rmUbBmK3Dra5prUTTRSVi6-XUFAa9Mj6B5CNPgzt9To,17333
5
- mmgp-3.3.3.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
- mmgp-3.3.3.dist-info/METADATA,sha256=xcODp7uhIfvy7Il1xEp8ed2VYmH1Eln-EnLy3MM4VGM,16153
7
- mmgp-3.3.3.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
8
- mmgp-3.3.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
- mmgp-3.3.3.dist-info/RECORD,,
File without changes